diff options
Diffstat (limited to 'src/gallium')
197 files changed, 3467 insertions, 2745 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index b5a9938c74..1ba0724949 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -37,7 +37,7 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURC $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(GENERATED_SOURCES) 2> /dev/null $(PROGS): % : %.o - $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group + $(LD) $(LDFLAGS) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group # Emacs tags tags: diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index f8e65cf6c6..2daed382cf 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -101,6 +101,7 @@ C_SOURCES = \ util/u_blit.c \ util/u_blitter.c \ util/u_cache.c \ + util/u_caps.c \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index db3a1e7311..a0673df8a8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -144,6 +144,7 @@ source = [ 'util/u_blit.c', 'util/u_blitter.c', 'util/u_cache.c', + 'util/u_caps.c', 'util/u_cpu_detect.c', 'util/u_debug.c', 'util/u_debug_memory.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 6fd4bd3642..c5fe7efa02 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -313,10 +313,13 @@ void cso_release_all( struct cso_context *ctx ) } +/** + * Free the CSO context. NOTE: the state tracker should have previously called + * cso_release_all(). + */ void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - /*cso_release_all( ctx );*/ FREE( ctx ); } } @@ -349,6 +352,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; + memset(&cso->state, 0, sizeof cso->state); memcpy(&cso->state, templ, key_size); cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 9d11031769..2c234285b5 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -24,6 +24,8 @@ /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); static void init_globals(struct draw_llvm *llvm) @@ -205,7 +207,9 @@ draw_llvm_create(struct draw_context *draw) void draw_llvm_destroy(struct draw_llvm *llvm) { - free(llvm); + LLVMDisposePassManager(llvm->pass); + + FREE(llvm); } struct draw_llvm_variant * @@ -218,6 +222,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); draw_llvm_generate(llvm, variant); + draw_llvm_generate_elts(llvm, variant); return variant; } @@ -252,7 +257,8 @@ generate_vs(struct draw_llvm *llvm, NULL /*pos*/, inputs, outputs, - NULL/*sampler*/); + NULL/*sampler*/, + &llvm->draw->vs.vertex_shader->info); } #if DEBUG_STORE @@ -285,10 +291,16 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); + LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); - LLVMValueRef stride = LLVMBuildMul(builder, - vb_stride, - index, ""); + LLVMValueRef cond; + LLVMValueRef stride; + + cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); + + index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); + + stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); @@ -689,6 +701,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_disassemble(variant->jit_func); } + +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_context bld_int; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + LLVMValueRef fetch_max; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ + arg_types[4] = LLVMInt32Type(); /* fetch_count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function_elts, 0); + io_ptr = LLVMGetParam(variant->function_elts, 1); + vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); + fetch_elts = LLVMGetParam(variant->function_elts, 3); + fetch_count = LLVMGetParam(variant->function_elts, 4); + stride = LLVMGetParam(variant->function_elts, 5); + vb_ptr = LLVMGetParam(variant->function_elts, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_count, "fetch_count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function_elts, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld_int, builder, lp_type_int(32)); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + + fetch_max = LLVMBuildSub(builder, fetch_count, + LLVMConstInt(LLVMInt32Type(), 1, 0), + "fetch_max"); + + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = lp_loop.counter; + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + LLVMValueRef fetch_ptr; + + /* make sure we're not out of bounds which can happen + * if fetch_count % 4 != 0, because on the last iteration + * a few of the 4 vertex fetches will be out of bounds */ + true_index = lp_build_min(&bld_int, true_index, fetch_max); + + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function_elts); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + + if (0) { + LLVMDumpValue(variant->function_elts); + debug_printf("\n"); + } + variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal( + llvm->draw->engine, variant->function_elts); + + if (0) + lp_disassemble(variant->jit_func_elts); +} + void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key) diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 28b9044a81..58fee7f9d6 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -78,7 +78,10 @@ struct draw_jit_context #define draw_jit_vbuffer_stride(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "stride") -#define draw_jit_vbuffer_offset(_builder, _ptr) \ +#define draw_jit_vbuffer_max_index(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "max_index") + +#define draw_jit_vbuffer_offset(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") @@ -91,6 +94,16 @@ typedef void unsigned stride, struct pipe_vertex_buffer *vertex_buffers); + +typedef void +(*draw_jit_vert_func_elts)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + const unsigned *fetch_elts, + unsigned fetch_count, + unsigned stride, + struct pipe_vertex_buffer *vertex_buffers); + struct draw_llvm { struct draw_context *draw; @@ -119,7 +132,9 @@ struct draw_llvm_variant { struct draw_llvm_variant_key key; LLVMValueRef function; + LLVMValueRef function_elts; draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; struct draw_llvm_variant *next; }; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index ee2b811603..abbf6247ab 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -137,7 +137,7 @@ emit_vertex( struct vbuf_stage *vbuf, */ /* Note: we really do want data[0] here, not data[pos]: */ - vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0); vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -271,7 +271,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) translate_key_sanitize(&hw_key); vbuf->translate = translate_cache_find(vbuf->cache, &hw_key); - vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0); } vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 265a420d01..ab16706581 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -54,7 +54,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) /** * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. */ static void wideline_line( struct draw_stage *stage, struct prim_header *header ) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4bb3282f62..a2bfb693c0 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -132,6 +132,7 @@ struct draw_context struct draw_pt_middle_end *fetch_emit; struct draw_pt_middle_end *fetch_shade_emit; struct draw_pt_middle_end *general; + struct draw_pt_middle_end *llvm; } middle; struct { @@ -253,8 +254,8 @@ struct draw_context #ifdef HAVE_LLVM LLVMExecutionEngineRef engine; - boolean use_llvm; #endif + void *driver_private; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index b5876bb1bd..b853f3a89f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -37,6 +37,13 @@ #include "util/u_math.h" #include "util/u_prim.h" + +DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE) +#ifdef HAVE_LLVM +DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE) +#endif + static unsigned trim( unsigned count, unsigned first, unsigned incr ) { if (count < first) @@ -90,12 +97,16 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } - if (opt == 0) - middle = draw->pt.middle.fetch_emit; - else if (opt == PT_SHADE && !draw->pt.no_fse) - middle = draw->pt.middle.fetch_shade_emit; - else - middle = draw->pt.middle.general; + if (draw->pt.middle.llvm) { + middle = draw->pt.middle.llvm; + } else { + if (opt == 0) + middle = draw->pt.middle.fetch_emit; + else if (opt == PT_SHADE && !draw->pt.no_fse) + middle = draw->pt.middle.fetch_shade_emit; + else + middle = draw->pt.middle.general; + } /* Pick the right frontend @@ -122,8 +133,8 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE); - draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE); + draw->pt.test_fse = debug_get_option_draw_fse(); + draw->pt.no_fse = debug_get_option_draw_no_fse(); draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) @@ -141,25 +152,26 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_shade_emit) return FALSE; -#if HAVE_LLVM - draw->use_llvm = debug_get_bool_option("DRAW_USE_LLVM", TRUE); - if (draw->use_llvm) - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw ); -#else - draw->pt.middle.general = NULL; -#endif - - if (!draw->pt.middle.general) - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; +#if HAVE_LLVM + if (debug_get_option_draw_use_llvm()) + draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw ); +#endif + return TRUE; } void draw_pt_destroy( struct draw_context *draw ) { + if (draw->pt.middle.llvm) { + draw->pt.middle.llvm->destroy( draw->pt.middle.llvm ); + draw->pt.middle.llvm = NULL; + } + if (draw->pt.middle.general) { draw->pt.middle.general->destroy( draw->pt.middle.general ); draw->pt.middle.general = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index a7917f54b0..ad48fa39a4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -171,12 +171,14 @@ void draw_pt_emit( struct pt_emit *emit, translate->set_buffer(translate, 0, vertex_data, - stride ); + stride, + ~0); translate->set_buffer(translate, 1, &draw->rasterizer->point_size, - 0); + 0, + ~0); translate->run( translate, 0, @@ -232,11 +234,11 @@ void draw_pt_emit_linear(struct pt_emit *emit, goto fail; translate->set_buffer(translate, 0, - vertex_data, stride); + vertex_data, stride, count - 1); translate->set_buffer(translate, 1, &draw->rasterizer->point_size, - 0); + 0, ~0); translate->run(translate, 0, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 252be5053e..a1347221b5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -149,7 +149,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate->set_buffer(fetch->translate, draw->pt.nr_vertex_buffers, &vh, - 0); + 0, + ~0); } } @@ -172,7 +173,8 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index); } translate->run_elts( translate, @@ -198,7 +200,8 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 1994ddf2bc..d7735bf1ac 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -168,7 +168,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, feme->translate->set_buffer(feme->translate, draw->pt.nr_vertex_buffers, &feme->point_size, - 0); + 0, + ~0); } feme->point_size = draw->rasterizer->point_size; @@ -178,7 +179,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 389e2b105e..cbb5b6c960 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -167,7 +167,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, i, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index f71271bd91..35913a5995 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -167,8 +167,6 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -182,35 +180,13 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, return; } - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, eg if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place.*/ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) - draw_geometry_shader_run(gshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - } + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + fetch_elts, + fetch_count, + fpme->vertex_size, + draw->pt.vertex_buffer ); if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, @@ -373,7 +349,31 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_llvm_variant *variant = NULL; + variant = fpme->variants; + while(variant) { + struct draw_llvm_variant *next = variant->next; + + if (variant->function_elts) { + if (variant->function_elts) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function_elts); + LLVMDeleteFunction(variant->function_elts); + } + + if (variant->function) { + if (variant->function) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + FREE(variant); + + variant = next; + } if (fpme->fetch) draw_pt_fetch_destroy( fpme->fetch ); diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index cfd5154024..c2832eefa2 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -46,7 +46,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" - +DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) void draw_vs_set_constants(struct draw_context *draw, @@ -157,7 +157,7 @@ draw_delete_vertex_shader(struct draw_context *draw, boolean draw_vs_init( struct draw_context *draw ) { - draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE); + draw->dump_vs = debug_get_option_gallium_dump_vs(); draw->vs.machine = tgsi_exec_machine_create(); if (!draw->vs.machine) diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index f49332352b..6c7e94db43 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -80,7 +80,8 @@ struct draw_vs_varient { void (*set_buffer)( struct draw_vs_varient *, unsigned i, const void *ptr, - unsigned stride ); + unsigned stride, + unsigned max_stride ); void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, unsigned start, @@ -168,8 +169,9 @@ draw_create_vs_ppc(struct draw_context *draw, struct draw_vs_varient_key; struct draw_vertex_shader; -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); @@ -187,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw, struct translate *draw_vs_get_emit( struct draw_context *draw, struct translate_key *key ); -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e7121f3654..19f49e34c8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, } +/** cast wrapper */ +static INLINE struct draw_vs_varient_aos_sse * +draw_vs_varient_aos_sse(struct draw_vs_varient *varient) +{ + return (struct draw_vs_varient_aos_sse *) varient; +} + static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_stride) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); if (buf < vaos->nr_vb) { vaos->buffer[buf].base_ptr = (char *)ptr; @@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, static void vaos_destroy( struct draw_vs_varient *varient ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); FREE( vaos->buffer ); @@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, } -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { struct draw_vs_varient *varient = varient_aos_sse( vs, key ); if (varient == NULL) { - varient = draw_vs_varient_generic( vs, key ); + varient = draw_vs_create_varient_generic( vs, key ); } return varient; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 7deca2b69d..bc34d390da 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->machine = draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d869eecec5..5df84916c5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, */ shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, - (const float (*)[4])constants[0], + (float (*)[4])constants[0], ppc_builtin_constants); /* convert (up to) four output verts from SoA back to AoS format */ @@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.create_varient = draw_vs_varient_aos_ppc; else #endif - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_ppc_prepare; vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 54e6423388..14c95082a9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; if (1) - vs->base.create_varient = draw_vs_varient_aos_sse; + vs->base.create_varient = draw_vs_create_varient_aos_sse; else - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 5ed706cb4f..6eb26927f2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -66,14 +66,16 @@ struct draw_vs_varient_generic { static void vsvg_set_buffer( struct draw_vs_varient *varient, unsigned buffer, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_index ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; vsvg->fetch->set_buffer(vsvg->fetch, buffer, ptr, - stride); + stride, + max_index ); } @@ -172,12 +174,14 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->set_buffer( vsvg->emit, 0, temp_buffer, - temp_vertex_stride ); + temp_vertex_stride, + ~0 ); vsvg->emit->set_buffer( vsvg->emit, 1, &vsvg->draw->rasterizer->point_size, - 0); + 0, + ~0 ); vsvg->emit->run( vsvg->emit, 0, count, @@ -232,12 +236,14 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->set_buffer( vsvg->emit, 0, temp_buffer, - temp_vertex_stride ); + temp_vertex_stride, + ~0 ); vsvg->emit->set_buffer( vsvg->emit, 1, &vsvg->draw->rasterizer->point_size, - 0); + 0, + ~0 ); vsvg->emit->run( vsvg->emit, 0, count, @@ -257,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient ) } -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { unsigned i; struct translate_key fetch, emit; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 8e8fcccf56..20ae958714 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1210,6 +1210,14 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->one; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1220,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } @@ -1230,6 +1239,14 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->zero; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1240,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } @@ -1502,8 +1520,10 @@ lp_build_log2_approx(struct lp_build_context *bld, res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } - if(p_exp) + if(p_exp) { + exp = LLVMBuildBitCast(bld->builder, exp, vec_type, ""); *p_exp = exp; + } if(p_floor_log2) *p_floor_log2 = logexp; @@ -1573,8 +1593,10 @@ lp_build_float_log2_approx(struct lp_build_context *bld, res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } - if(p_exp) + if(p_exp) { + exp = LLVMBuildBitCast(bld->builder, exp, float_type, ""); *p_exp = exp; + } if(p_floor_log2) *p_floor_log2 = logexp; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index e60ab4f6ba..8f15b1d287 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -792,3 +792,78 @@ lp_build_endif(struct lp_build_if_state *ctx) /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); } + + +/** + * Allocate a scalar (or vector) variable. + * + * Although not strictly part of control flow, control flow has deep impact in + * how variables should be allocated. + * + * The mem2reg optimization pass is the recommended way to dealing with mutable + * variables, and SSA. It looks for allocas and if it can handle them, it + * promotes them, but only looks for alloca instructions in the entry block of + * the function. Being in the entry block guarantees that the alloca is only + * executed once, which makes analysis simpler. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderAtEnd(first_builder, first_block); + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildAlloca(first_builder, type, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} + + +/** + * Allocate an array of scalars/vectors. + * + * mem2reg pass is not capable of promoting structs or arrays to registers, but + * we still put it in the first block anyway as failure to put allocas in the + * first block may prevent the X86 backend from successfully align the stack as + * required. + * + * Also the scalarrepl pass is supossedly more powerful and can promote + * arrays in many cases. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildArrayAlloca(first_builder, type, count, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 745838570c..fffb493a93 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -156,5 +156,15 @@ lp_build_endif(struct lp_build_if_state *ctx); LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name); + +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index a5a019fa92..6257e9a404 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -40,6 +40,7 @@ #include "lp_bld_init.h" #include "lp_bld_type.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" @@ -370,11 +371,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); } - /* - * XXX: this should better go to the first block in the function - */ - - tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index de07c222a3..5067d0a164 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -27,6 +27,7 @@ #include "pipe/p_compiler.h" +#include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "lp_bld_init.h" @@ -62,6 +63,15 @@ lp_build_init(void) if (!lp_build_target) lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); + + util_cpu_detect(); + +#if 0 + /* For simulating less capable machines */ + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; + util_cpu_caps.has_sse4_1 = 0; +#endif } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index a3b6970116..d13fa1a5d0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -472,18 +472,6 @@ lp_build_select_aos(struct lp_build_context *bld, } } -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld) -{ - const struct lp_type type = bld->type; - - if (type.length > 1) { /*vector*/ - return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); - } else { /*scalar*/ - return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); - } -} - /** Return (a & ~b) */ LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 00a8c75019..29f9fc3b20 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -76,9 +76,6 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef b, const boolean cond[4]); -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld); - LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index eb75b9b393..195a4953ab 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -77,6 +77,11 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, */ state->format = view->format; + state->swizzle_r = view->swizzle_r; + state->swizzle_g = view->swizzle_g; + state->swizzle_b = view->swizzle_b; + state->swizzle_a = view->swizzle_a; + state->target = texture->target; state->pot_width = util_is_pot(texture->width0); state->pot_height = util_is_pot(texture->height0); @@ -181,54 +186,16 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef offset; x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); + offset = lp_build_mul(bld, x, x_stride); - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - /* XXX 1D & 3D addressing not done yet */ - assert(!z); - assert(!z_stride); - - x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_vec(bld->type, 2*format_desc->block.bits/8); - - x_stride_hi = lp_build_const_vec(bld->type, 4*format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); - - x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); - - offset = lp_build_add(bld, offset_hi, offset_lo); + if (y && y_stride) { + LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + offset = lp_build_add(bld, offset, y_offset); } - else { - offset = lp_build_mul(bld, x, x_stride); - - if (y && y_stride) { - LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); - offset = lp_build_add(bld, offset, y_offset); - } - - if (z && z_stride) { - LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); - offset = lp_build_add(bld, offset, z_offset); - } + + if (z && z_stride) { + LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + offset = lp_build_add(bld, offset, z_offset); } return offset; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index e287376385..8ceb20473d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -54,8 +54,14 @@ struct lp_build_context; */ struct lp_sampler_static_state { - /* pipe_texture's state */ + /* pipe_sampler_view's state */ enum pipe_format format; + unsigned swizzle_r:3; + unsigned swizzle_g:3; + unsigned swizzle_b:3; + unsigned swizzle_a:3; + + /* pipe_texture's state */ unsigned target:3; unsigned pot_width:1; unsigned pot_height:1; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index c9b613e21c..54c0ad7ce4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -185,6 +185,53 @@ texture_dims(enum pipe_texture_target tex) } +static LLVMValueRef +lp_build_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + return unswizzled[swizzle]; + case PIPE_SWIZZLE_ZERO: + return lp_build_zero(type); + case PIPE_SWIZZLE_ONE: + return lp_build_one(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +static void +lp_build_swizzle_soa(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + LLVMValueRef unswizzled[4]; + unsigned char swizzles[4]; + unsigned chan; + + for (chan = 0; chan < 4; ++chan) { + unswizzled[chan] = texel[chan]; + } + + swizzles[0] = bld->static_state->swizzle_r; + swizzles[1] = bld->static_state->swizzle_g; + swizzles[2] = bld->static_state->swizzle_b; + swizzles[3] = bld->static_state->swizzle_a; + + for (chan = 0; chan < 4; ++chan) { + unsigned swizzle = swizzles[chan]; + texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type, + unswizzled, swizzle); + } +} + + /** * Generate code to fetch a texel from a texture at int coords (x, y, z). @@ -278,6 +325,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->format_desc, x, y, z, y_stride, z_stride); + if (use_border) { + /* If we can sample the border color, it means that texcoords may + * lie outside the bounds of the texture image. We need to do + * something to prevent reading out of bounds and causing a segfault. + * + * Simply AND the texture coords with !use_border. This will cause + * coords which are out of bounds to become zero. Zero's guaranteed + * to be inside the texture image. + */ + offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border); + } + lp_build_fetch_rgba_soa(bld->builder, bld->format_desc, bld->texel_type, @@ -285,6 +344,8 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, i, j, texel); + lp_build_swizzle_soa(bld, texel); + /* * Note: if we find an app which frequently samples the texture border * we might want to implement a true conditional here to avoid sampling @@ -842,6 +903,7 @@ lp_build_minify(struct lp_build_sample_context *bld, * \param s vector of texcoord s values * \param t vector of texcoord t values * \param r vector of texcoord r values + * \param shader_lod_bias vector float with the shader lod bias, * \param width scalar int texture width * \param height scalar int texture height * \param depth scalar int texture depth @@ -851,6 +913,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef shader_lod_bias, LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth) @@ -865,8 +928,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, else { const int dims = texture_dims(bld->static_state->target); struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), - bld->static_state->lod_bias); + LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), @@ -940,8 +1003,14 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* compute lod = log2(rho) */ lod = lp_build_log2(float_bld, rho); - /* add lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + /* add sampler lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias"); + + /* add shader lod bias */ + /* XXX for now we take only the first element since our lod is scalar */ + shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias, + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -1527,6 +1596,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef lodbias, LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth, @@ -1564,7 +1634,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth); } /* @@ -1772,6 +1842,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef stride; + assert(bld->static_state->target == PIPE_TEXTURE_2D); + assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); @@ -1945,6 +2020,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, lp_build_format_swizzle_soa(bld->format_desc, bld->texel_type, unswizzled, texel); + + lp_build_swizzle_soa(bld, texel); } @@ -1984,6 +2061,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, /** + * Just set texels to white instead of actually sampling the texture. + * For debugging. + */ +static void +lp_build_sample_nop(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + unsigned chan; + + for (chan = 0; chan < 4; chan++) { + /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/ + texel[chan] = texel_bld->one; + } +} + + +/** * Build texture sampling code. * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. @@ -2048,19 +2143,23 @@ lp_build_sample_soa(LLVMBuilderRef builder, height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); - if (util_format_is_rgba8_variant(bld.format_desc) && - static_state->target == PIPE_TEXTURE_2D && - static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) { + if (0) { + /* For debug: no-op texture sampling */ + lp_build_sample_nop(&bld, texel); + } + else if (util_format_is_rgba8_variant(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { /* special case */ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, row_stride_array, data_array, texel); } else { - lp_build_sample_general(&bld, unit, s, t, r, + lp_build_sample_general(&bld, unit, s, t, r, lodbias, width, height, depth, width_vec, height_vec, depth_vec, row_stride_array, img_stride_array, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 63b938bfa9..2eac5da6c6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -39,6 +39,7 @@ struct tgsi_token; +struct tgsi_shader_info; struct lp_type; struct lp_build_context; struct lp_build_mask_context; @@ -78,7 +79,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - struct lp_build_sampler_soa *sampler); + struct lp_build_sampler_soa *sampler, + struct tgsi_shader_info *info); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 44f8aec1bf..d3c769e28b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -46,6 +46,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" @@ -125,6 +126,12 @@ struct lp_build_tgsi_soa_context LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS]; + + /* we allocate an array of temps if we have indirect + * addressing and then the temps above is unused */ + LLVMValueRef temps_array; + boolean has_indirect_addressing; struct lp_build_mask_context *mask; struct lp_exec_mask exec_mask; @@ -169,8 +176,7 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context static void lp_exec_mask_update(struct lp_exec_mask *mask) { if (mask->loop_stack_size) { - /*for loops we need to update the entire mask at - * runtime */ + /*for loops we need to update the entire mask at runtime */ LLVMValueRef tmp; assert(mask->break_mask); tmp = LLVMBuildAnd(mask->bld->builder, @@ -232,6 +238,9 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask) mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); if (mask->cond_stack_size == 0) mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); LLVMBuildBr(mask->bld->builder, mask->loop_block); @@ -246,7 +255,10 @@ static void lp_exec_break(struct lp_exec_mask *mask) mask->exec_mask, "break"); - mask->break_stack[mask->break_stack_size++] = mask->break_mask; + /* mask->break_stack_size > 1 implies that we encountered a break + * statemant already and if that's the case we want to make sure + * our mask is a combination of the previous break and the current + * execution mask */ if (mask->break_stack_size > 1) { mask->break_mask = LLVMBuildAnd(mask->bld->builder, mask->break_mask, @@ -263,7 +275,6 @@ static void lp_exec_continue(struct lp_exec_mask *mask) mask->exec_mask, ""); - mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; if (mask->cont_stack_size > 1) { mask->cont_mask = LLVMBuildAnd(mask->bld->builder, mask->cont_mask, @@ -299,17 +310,23 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; - /* pop the break mask */ + /* pop the cont mask */ if (mask->cont_stack_size) { mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; } + /* pop the break mask */ if (mask->break_stack_size) { - mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + mask->break_mask = mask->break_stack[--mask->break_stack_size]; } lp_exec_mask_update(mask); } +/* stores val into an address pointed to by dst. + * mask->exec_mask is used to figure out which bits of val + * should be stored into the address + * (0 means don't store this bit, 1 means do store). + */ static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) @@ -347,6 +364,23 @@ emit_ddy(struct lp_build_tgsi_soa_context *bld, return lp_build_sub(&bld->base, src_top, src_bottom); } +static LLVMValueRef +get_temp_ptr(struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned swizzle, + boolean is_indirect, + LLVMValueRef addr) +{ + if (!bld->has_indirect_addressing) { + return bld->temps[index][swizzle]; + } else { + LLVMValueRef lindex = + LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0); + if (is_indirect) + lindex = lp_build_add(&bld->base, lindex, addr); + return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); + } +} /** * Register fetch. @@ -361,6 +395,7 @@ emit_fetch( const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; + LLVMValueRef addr; switch (swizzle) { case TGSI_SWIZZLE_X: @@ -368,11 +403,34 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); - LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); - LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + LLVMValueRef scalar, scalar_ptr; + + if (reg->Register.Indirect) { + /*lp_build_printf(bld->base.builder, + "\taddr = %d\n", addr);*/ + index = lp_build_add(&bld->base, index, addr); + } + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); + scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + res = lp_build_broadcast_scalar(&bld->base, scalar); break; } @@ -387,11 +445,16 @@ emit_fetch( assert(res); break; - case TGSI_FILE_TEMPORARY: - res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], ""); + case TGSI_FILE_TEMPORARY: { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + swizzle, + reg->Register.Indirect, + addr); + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); if(!res) return bld->base.undef; break; + } default: assert( 0 ); @@ -469,6 +532,7 @@ emit_store( LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + LLVMValueRef addr; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -488,20 +552,39 @@ emit_store( assert(0); } + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: lp_exec_mask_store(&bld->exec_mask, value, bld->outputs[reg->Register.Index][chan_index]); break; - case TGSI_FILE_TEMPORARY: - lp_exec_mask_store(&bld->exec_mask, value, - bld->temps[reg->Register.Index][chan_index]); + case TGSI_FILE_TEMPORARY: { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + chan_index, + reg->Register.Indirect, + addr); + lp_exec_mask_store(&bld->exec_mask, value, temp_ptr); break; + } case TGSI_FILE_ADDRESS: - /* FIXME */ - assert(0); + lp_exec_mask_store(&bld->exec_mask, value, + bld->addr[reg->Indirect.Index][chan_index]); break; case TGSI_FILE_PREDICATE: @@ -656,62 +739,42 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, lp_build_mask_update(bld->mask, mask); } - -/** - * Check if inst src/dest regs use indirect addressing into temporary - * register file. - */ -static boolean -indirect_temp_reference(const struct tgsi_full_instruction *inst) -{ - uint i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - return FALSE; -} - static int emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; - LLVMBasicBlockRef current_block = - LLVMGetInsertBlock(bld->base.builder); - LLVMBasicBlockRef first_block = - LLVMGetEntryBasicBlock( - LLVMGetBasicBlockParent(current_block)); - LLVMValueRef first_inst = - LLVMGetFirstInstruction(first_block); - - /* we want alloca's to be the first instruction - * in the function so we need to rewind the builder - * to the very beginning */ - LLVMPositionBuilderBefore(bld->base.builder, - first_inst); for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(&bld->base); + if (bld->has_indirect_addressing) { + LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), + last*4 + 4, 0); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, val, ""); + } else { + for (i = 0; i < NUM_CHANNELS; i++) + bld->temps[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); + } break; case TGSI_FILE_OUTPUT: for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(&bld->base); + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_ADDRESS: + for (i = 0; i < NUM_CHANNELS; i++) + bld->addr[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; default: @@ -720,8 +783,6 @@ emit_declaration( } } - LLVMPositionBuilderAtEnd(bld->base.builder, - current_block); return TRUE; } @@ -747,10 +808,6 @@ emit_instruction( LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; - /* we can't handle indirect addressing into temp register file yet */ - if (indirect_temp_reference(inst)) - return FALSE; - /* * Stores and write masks are handled in a general fashion after the long * instruction opcode switch statement. @@ -770,17 +827,13 @@ emit_instruction( } switch (inst->Instruction.Opcode) { -#if 0 case TGSI_OPCODE_ARL: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr(bld, 0, 0); - emit_f2it( bld, 0 ); + tmp0 = lp_build_floor(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_MOV: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1350,17 +1403,13 @@ emit_instruction( return FALSE; break; -#if 0 case TGSI_OPCODE_ARR: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_f2it( bld, 0 ); + tmp0 = lp_build_round(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_BRA: /* deprecated */ @@ -1540,22 +1589,10 @@ emit_instruction( lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; - case TGSI_OPCODE_BGNFOR: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_BGNLOOP: lp_exec_bgnloop(&bld->exec_mask); break; - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ELSE: lp_exec_mask_cond_invert(&bld->exec_mask); break; @@ -1564,22 +1601,10 @@ emit_instruction( lp_exec_mask_cond_pop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDFOR: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ENDLOOP: lp_exec_endloop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); @@ -1710,7 +1735,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - struct lp_build_sampler_soa *sampler) + struct lp_build_sampler_soa *sampler, + struct tgsi_shader_info *info) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1726,6 +1752,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; + bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || + info->opcode_count[TGSI_OPCODE_ARL] > 0; lp_exec_mask_init(&bld.exec_mask, &bld.base); @@ -1746,10 +1774,10 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, case TGSI_TOKEN_TYPE_INSTRUCTION: { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : "<invalid>"); + opcode_info->mnemonic); } break; diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 07a4268fc0..c09e8a7a76 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -302,6 +302,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) typedef struct { unsigned count; unsigned waiters; + uint64_t sequence; pipe_mutex mutex; pipe_condvar condvar; } pipe_barrier; @@ -310,6 +311,7 @@ static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) { barrier->count = count; barrier->waiters = 0; + barrier->sequence = 0; pipe_mutex_init(barrier->mutex); pipe_condvar_init(barrier->condvar); } @@ -329,9 +331,14 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) barrier->waiters++; if (barrier->waiters < barrier->count) { - pipe_condvar_wait(barrier->condvar, barrier->mutex); + uint64_t sequence = barrier->sequence; + + do { + pipe_condvar_wait(barrier->condvar, barrier->mutex); + } while (sequence == barrier->sequence); } else { barrier->waiters = 0; + barrier->sequence++; pipe_condvar_broadcast(barrier->condvar); } diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 080fd4c731..5d9eed9258 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -661,25 +661,6 @@ TGSI Instruction Specification TBD -1.9.8 BGNFOR - Begin a For-Loop - - dst.x = floor(src.x) - dst.y = floor(src.y) - dst.z = floor(src.z) - - if (dst.y <= 0) - pc = [matching ENDFOR] + 1 - endif - - Note: The destination must be a loop register. - The source must be a constant register. - - -1.9.9 REP - Repeat - - TBD - - 1.9.10 ELSE - Else TBD @@ -690,23 +671,6 @@ TGSI Instruction Specification TBD -1.9.12 ENDFOR - End a For-Loop - - dst.x = dst.x + dst.z - dst.y = dst.y - 1.0 - - if (dst.y > 0) - pc = [matching BGNFOR instruction] + 1 - endif - - Note: The destination must be a loop register. - - -1.9.13 ENDREP - End Repeat - - TBD - - 1.10 GL_NV_vertex_program3 --------------------------- diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 57031419f8..8300020018 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -586,7 +586,6 @@ iter_instruction( /* update indentation */ if (inst->Instruction.Opcode == TGSI_OPCODE_IF || inst->Instruction.Opcode == TGSI_OPCODE_ELSE || - inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR || inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { ctx->indentation += indent_spaces; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 11045e4ba2..82eac05dc4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3186,14 +3186,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - case TGSI_OPCODE_PUSHA: assert (0); break; @@ -3258,29 +3250,6 @@ exec_instruction( emit_primitive(mach); break; - case TGSI_OPCODE_BGNFOR: - assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - for (chan_index = 0; chan_index < 3; chan_index++) { - FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); - } - ++mach->LoopCounterStackTop; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - /* TODO: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -3295,56 +3264,6 @@ exec_instruction( mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; - case TGSI_OPCODE_ENDFOR: - assert(mach->LoopCounterStackTop > 0); - micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); - assert(mach->LoopLabelStackTop > 0); - inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - assert(mach->LoopLabelStackTop > 0); - *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->LoopLabelStackTop > 0); - --mach->LoopLabelStackTop; - assert(mach->LoopCounterStackTop > 0); - --mach->LoopCounterStackTop; - - mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index de0e09cdba..cfa2f631bd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */ + { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */ { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */ + { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */ { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e4af15c156..e472947507 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -111,12 +111,8 @@ OP12(DP2) OP12_TEX(TXL) OP00(BRK) OP01_LBL(IF) -OP11(BGNFOR) -OP01(REP) OP00_LBL(ELSE) OP00(ENDIF) -OP10(ENDFOR) -OP00(ENDREP) OP01(PUSHA) OP10(POPA) OP11(CEIL) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 371f690b29..76b7564cc3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -346,25 +346,6 @@ iter_instruction( } } - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - case TGSI_OPCODE_ENDFOR: - if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || - inst->Dst[0].Register.Index != 0) { - report_error(ctx, "Destination register must be LOOP[0]"); - } - break; - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && - inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { - report_error(ctx, "Source register file must be either CONST or IMM"); - } - break; - } - ctx->num_instructions++; return TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a85cc4659e..1071298b49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2533,14 +2533,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_BGNFOR: - return 0; - break; - - case TGSI_OPCODE_REP: - return 0; - break; - case TGSI_OPCODE_ELSE: return 0; break; @@ -2549,14 +2541,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_ENDFOR: - return 0; - break; - - case TGSI_OPCODE_ENDREP: - return 0; - break; - case TGSI_OPCODE_PUSHA: return 0; break; diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 54ed2c1a4b..edd95e0788 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -76,7 +76,8 @@ struct translate { void (*set_buffer)( struct translate *, unsigned i, const void *ptr, - unsigned stride ); + unsigned stride, + unsigned max_index ); void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index c3ec9ae3f4..a9272fbb49 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -31,6 +31,7 @@ */ #include "util/u_memory.h" +#include "util/u_math.h" #include "pipe/p_state.h" #include "translate.h" @@ -58,6 +59,7 @@ struct translate_generic { char *input_ptr; unsigned input_stride; + unsigned max_index; } attrib[PIPE_MAX_ATTRIBS]; @@ -588,19 +590,22 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; const char *src; + unsigned index; char *dst = (vert + tg->attrib[attr].output_offset); if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); + index = instance_id / tg->attrib[attr].instance_divisor; } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + index = elt; } + index = MIN2(index, tg->attrib[attr].max_index); + + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", @@ -670,7 +675,8 @@ static void PIPE_CDECL generic_run( struct translate *translate, static void generic_set_buffer( struct translate *translate, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_index ) { struct translate_generic *tg = translate_generic(translate); unsigned i; @@ -680,6 +686,7 @@ static void generic_set_buffer( struct translate *translate, tg->attrib[i].input_ptr = ((char *)ptr + tg->attrib[i].input_offset); tg->attrib[i].input_stride = stride; + tg->attrib[i].max_index = max_index; } } } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index c13e742738..ef3aa674a3 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -61,6 +61,7 @@ typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, struct translate_buffer { const void *base_ptr; unsigned stride; + unsigned max_index; }; struct translate_buffer_varient { @@ -423,6 +424,11 @@ static boolean init_inputs( struct translate_sse *p, } else { x86_mov(p->func, tmp_EAX, elt); } + + /* + * TODO: Respect translate_buffer::max_index. + */ + x86_imul(p->func, tmp_EAX, buf_stride); x86_add(p->func, tmp_EAX, buf_base_ptr); @@ -666,13 +672,15 @@ static boolean build_vertex_emit( struct translate_sse *p, static void translate_sse_set_buffer( struct translate *translate, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_index ) { struct translate_sse *p = (struct translate_sse *)translate; if (buf < p->nr_buffers) { p->buffer[buf].base_ptr = (char *)ptr; p->buffer[buf].stride = stride; + p->buffer[buf].max_index = max_index; } if (0) debug_printf("%s %d/%d: %p %d\n", diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c new file mode 100644 index 0000000000..c7c1e830e0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.c @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_screen.h" +#include "util/u_format.h" +#include "util/u_debug.h" +#include "u_caps.h" + +/** + * Iterates over a list of caps checks as defined in u_caps.h. Should + * all checks pass returns TRUE and out is set to the last element of + * the list (TERMINATE). Should any check fail returns FALSE and set + * out to the index of the start of the first failing check. + */ +boolean +util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) +{ + int i, tmpi; + float tmpf; + + for (i = 0; list[i];) { + switch(list[i++]) { + case UTIL_CAPS_CHECK_CAP: + if (!screen->get_param(screen, list[i++])) { + *out = i - 2; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_INT: + tmpi = screen->get_param(screen, list[i++]); + if (tmpi < (int)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FLOAT: + tmpf = screen->get_paramf(screen, list[i++]); + if (tmpf < (float)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FORMAT: + if (!screen->is_format_supported(screen, + list[i++], + PIPE_TEXTURE_2D, + PIPE_BIND_SAMPLER_VIEW, + 0)) { + *out = i - 2; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + *out = i - 1; + return FALSE; + default: + assert(!"Unsupported check"); + return FALSE; + } + } + + *out = i; + return TRUE; +} + +/** + * Iterates over a list of caps checks as defined in u_caps.h. + * Returns TRUE if all caps checks pass returns FALSE otherwise. + */ +boolean +util_check_caps(struct pipe_screen *screen, const unsigned *list) +{ + int out; + return util_check_caps_out(screen, list, &out); +} + + +/* + * Below follows some demo lists. + * + * None of these lists are exhausting lists of what is + * actually needed to support said API and more here for + * as example on how to uses the above functions. Especially + * for DX10 and DX11 where Gallium is missing features. + */ + +/* DX 9_1 */ +static unsigned caps_dx_9_1[] = { + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_2 */ +static unsigned caps_dx_9_2[] = { + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_3 */ +static unsigned caps_dx_9_3[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 10 */ +static unsigned caps_dx_10[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* DX11 */ +static unsigned caps_dx_11[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_FORMAT(B8G8R8A8_UNORM), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 2.1 */ +static unsigned caps_opengl_2_1[] = { + UTIL_CHECK_CAP(GLSL), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(TWO_SIDED_STENCIL), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2), + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 3.0 */ +/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */ + + +/** + * Demo function which checks against theoretical caps needed for different APIs. + */ +void util_caps_demo_print(struct pipe_screen *screen) +{ + struct { + char* name; + unsigned *list; + } list[] = { + {"DX 9.1", caps_dx_9_1}, + {"DX 9.2", caps_dx_9_2}, + {"DX 9.3", caps_dx_9_3}, + {"DX 10", caps_dx_10}, + {"DX 11", caps_dx_11}, + {"OpenGL 2.1", caps_opengl_2_1}, +/* {"OpenGL 3.0", caps_opengl_3_0},*/ + {NULL, NULL} + }; + int i, out = 0; + + for (i = 0; list[i].name; i++) { + if (util_check_caps_out(screen, list[i].list, &out)) { + debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name); + continue; + } + switch (list[i].list[out]) { + case UTIL_CAPS_CHECK_CAP: + debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1]); + break; + case UTIL_CAPS_CHECK_INT: + debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FLOAT: + debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + (double)(int)list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FORMAT: + debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__, + list[i].name, + util_format_name(list[i].list[out + 1]) + 12); + break; + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + debug_printf("%s: %s no (not implemented in gallium or state tracker)\n", + __FUNCTION__, list[i].name); + break; + default: + assert(!"Unsupported check"); + } + } +} diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h new file mode 100644 index 0000000000..b1074f9eb2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.h @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_CAPS_H +#define U_CAPS_H + +#include "pipe/p_compiler.h" + +struct pipe_screen; + +enum u_caps_check_enum { + UTIL_CAPS_CHECK_TERMINATE = 0, + UTIL_CAPS_CHECK_CAP, + UTIL_CAPS_CHECK_INT, + UTIL_CAPS_CHECK_FLOAT, + UTIL_CAPS_CHECK_FORMAT, + UTIL_CAPS_CHECK_UNIMPLEMENTED, +}; + +#define UTIL_CHECK_CAP(cap) \ + UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap + +#define UTIL_CHECK_INT(cap, higher) \ + UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher) + +/* Floats currently lose precision */ +#define UTIL_CHECK_FLOAT(cap, higher) \ + UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher) + +#define UTIL_CHECK_FORMAT(format) \ + UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format + +#define UTIL_CHECK_UNIMPLEMENTED \ + UTIL_CAPS_CHECK_UNIMPLEMENTED + +#define UTIL_CHECK_TERMINATE \ + UTIL_CAPS_CHECK_TERMINATE + +boolean util_check_caps(struct pipe_screen *screen, const unsigned *list); +boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out); +void util_caps_demo_print(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index dd044973f9..0de38e791d 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -74,6 +74,24 @@ void debug_print_blob( const char *name, #endif +static boolean +debug_get_option_should_print(void) +{ + static boolean first = TRUE; + static boolean value = FALSE; + + if (!first) + return value; + + /* Oh hey this will call into this function, + * but its cool since we set first to false + */ + first = FALSE; + value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE); + /* XXX should we print this option? Currently it wont */ + return value; +} + const char * debug_get_option(const char *name, const char *dfault) { @@ -82,8 +100,9 @@ debug_get_option(const char *name, const char *dfault) result = os_get_option(name); if(!result) result = dfault; - - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); return result; } @@ -109,7 +128,8 @@ debug_get_bool_option(const char *name, boolean dfault) else result = TRUE; - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); + if (debug_get_option_should_print()) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); return result; } @@ -142,8 +162,9 @@ debug_get_num_option(const char *name, long dfault) } result *= sign; } - - debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); return result; } @@ -176,11 +197,12 @@ debug_get_flags_option(const char *name, } } - if (str) { - debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); - } - else { - debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + if (debug_get_option_should_print()) { + if (str) { + debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + } else { + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + } } return result; diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index b6d0b508e3..e8ff2773e6 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -303,6 +303,45 @@ debug_get_flags_option(const char *name, const struct debug_named_value *flags, unsigned long dfault); +#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \ +static boolean \ +debug_get_option_ ## sufix (void) \ +{ \ + static boolean first = TRUE; \ + static boolean value; \ + if (first) { \ + first = FALSE; \ + value = debug_get_bool_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_NUM_OPTION(sufix, name, dfault) \ +static long \ +debug_get_option_ ## sufix (void) \ +{ \ + static boolean first = TRUE; \ + static long value; \ + if (first) { \ + first = FALSE; \ + value = debug_get_num_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_FLAGS_OPTION(sufix, name, flags, dfault) \ +static unsigned long \ +debug_get_option_ ## sufix (void) \ +{ \ + static boolean first = TRUE; \ + static unsigned long value; \ + if (first) { \ + first = FALSE; \ + value = debug_get_flags_option(name, flags, dfault); \ + } \ + return value; \ +} + unsigned long debug_memory_begin(void); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index c134f13e90..2ce643e90c 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -656,12 +656,12 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) util_dump_struct_begin(stream, "pipe_transfer"); util_dump_member(stream, ptr, state, resource); -// util_dump_member(stream, uint, state, box); + /*util_dump_member(stream, uint, state, box);*/ util_dump_member(stream, uint, state, stride); util_dump_member(stream, uint, state, slice_stride); -// util_dump_member(stream, ptr, state, data); + /*util_dump_member(stream, ptr, state, data);*/ util_dump_struct_end(stream); } diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 5e3dc694be..fb6ade5c06 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -192,6 +192,7 @@ struct util_format_description /** * Unpack pixel blocks to R8G8B8A8_UNORM. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -202,6 +203,7 @@ struct util_format_description /** * Pack pixel blocks from R8G8B8A8_UNORM. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -212,6 +214,7 @@ struct util_format_description /** * Unpack pixel blocks to R32G32B32A32_FLOAT. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -222,6 +225,7 @@ struct util_format_description /** * Pack pixel blocks from R32G32B32A32_FLOAT. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -242,6 +246,7 @@ struct util_format_description /** * Unpack pixels to Z32_UNORM. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -252,6 +257,7 @@ struct util_format_description /** * Pack pixels from Z32_FLOAT. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -262,6 +268,7 @@ struct util_format_description /** * Unpack pixels to Z32_FLOAT. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -272,6 +279,7 @@ struct util_format_description /** * Pack pixels from Z32_FLOAT. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -282,6 +290,7 @@ struct util_format_description /** * Unpack pixels to S8_USCALED. + * Note: strides are in bytes. * * Only defined for stencil formats. */ @@ -292,6 +301,7 @@ struct util_format_description /** * Pack pixels from S8_USCALED. + * Note: strides are in bytes. * * Only defined for stencil formats. */ @@ -322,7 +332,7 @@ util_format_name(enum pipe_format format) assert(desc); if (!desc) { - return "???"; + return "PIPE_FORMAT_???"; } return desc->name; diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index 79dee2b423..5b279b8fe2 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -233,108 +233,80 @@ util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned * Block decompression. */ -void -util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) -{ +static INLINE void +util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height, + util_format_dxtn_fetch_t fetch, + unsigned block_size) +{ + const unsigned bw = 4, bh = 4, comps = 4; unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { + for(y = 0; y < height; y += bh) { const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt1_rgb_fetch(0, src, i, j, dst); + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + fetch(0, src, i, j, dst); } } - src += 8; + src += block_size; } src_row += src_stride; } } void -util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt1_rgba_fetch(0, src, i, j, dst); - } - } - src += 8; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, 8); } void -util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt3_rgba_fetch(0, src, i, j, dst); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, 8); } void -util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt5_rgba_fetch(0, src, i, j, dst); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, 16); } void -util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - uint8_t tmp[4]; - util_format_dxt1_rgb_fetch(0, src, i, j, tmp); - dst[0] = ubyte_to_float(tmp[0]); - dst[1] = ubyte_to_float(tmp[1]); - dst[2] = ubyte_to_float(tmp[2]); - dst[3] = 1.0; - } - } - src += 8; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, 16); } -void -util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +static INLINE void +util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height, + util_format_dxtn_fetch_t fetch, + unsigned block_size) { unsigned x, y, i, j; for(y = 0; y < height; y += 4) { @@ -344,65 +316,61 @@ util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, con for(i = 0; i < 4; ++i) { float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; uint8_t tmp[4]; - util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + fetch(0, src, i, j, tmp); dst[0] = ubyte_to_float(tmp[0]); dst[1] = ubyte_to_float(tmp[1]); dst[2] = ubyte_to_float(tmp[2]); dst[3] = ubyte_to_float(tmp[3]); } } - src += 8; + src += block_size; } src_row += src_stride; } } void -util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - uint8_t tmp[4]; - util_format_dxt3_rgba_fetch(0, src, i, j, tmp); - dst[0] = ubyte_to_float(tmp[0]); - dst[1] = ubyte_to_float(tmp[1]); - dst[2] = ubyte_to_float(tmp[2]); - dst[3] = ubyte_to_float(tmp[3]); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, 8); } void -util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - uint8_t tmp[4]; - util_format_dxt5_rgba_fetch(0, src, i, j, tmp); - dst[0] = ubyte_to_float(tmp[0]); - dst[1] = ubyte_to_float(tmp[1]); - dst[2] = ubyte_to_float(tmp[2]); - dst[3] = ubyte_to_float(tmp[3]); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, 8); +} + +void +util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, 16); +} + +void +util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, 16); } @@ -411,201 +379,198 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, con */ void -util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][3]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][3]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k]; } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][3]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index 0be4609a20..668da8c5c2 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -9,13 +9,13 @@ static unsigned hash(void *key) { - return (unsigned)key; + return (unsigned)(uintptr_t)key; } static int compare(void *key1, void *key2) { - return (unsigned)key1 - (unsigned)key2; + return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2; } struct pipe_surface * @@ -67,7 +67,7 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) struct pipe_resource *pt = ps->texture; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) { /* or 2D array */ - void* key = (void*)(((ps->zslice + ps->face) << 8) | ps->level); + void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level); util_hash_table_remove(us->u.table, key); } else @@ -105,7 +105,7 @@ util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void ( if(ps) destroy_surface(ps); } - free(us->u.array); + FREE(us->u.array); us->u.array = NULL; } } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index fe327c302b..f7aa1403d0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -544,7 +544,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -571,7 +571,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8_USCALED_Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py index 59c19ed98d..ccc84405c4 100644 --- a/src/gallium/docs/source/conf.py +++ b/src/gallium/docs/source/conf.py @@ -45,9 +45,9 @@ copyright = u'2009, VMWare, X.org, Nouveau' # built documents. # # The short X.Y version. -version = '0.3' +version = '0.4' # The full version, including alpha/beta/rc tags. -release = '0.3' +release = '0.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index b6efd1d40c..c5815f8939 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -3,16 +3,19 @@ Screen A screen is an object representing the context-independent part of a device. -Useful Flags ------------- +Flags and enumerations +---------------------- + +XXX some of these don't belong in this section. + .. _pipe_cap: -PIPE_CAP -^^^^^^^^ +PIPE_CAP_* +^^^^^^^^^^ -Pipe capabilities help expose hardware functionality not explicitly required -by Gallium. For floating-point values, use :ref:`get_paramf`, and for boolean +Capability queries return information about the features and limits of the +driver/GPU. For floating-point values, use :ref:`get_paramf`, and for boolean or integer values, use :ref:`get_param`. The integer capabilities: @@ -56,6 +59,19 @@ The integer capabilities: to any shader stage using ``set_constant_buffer``. If 0 or 1, the pipe will only permit binding one constant buffer per shader, and the shaders will not permit two-dimensional access to constants. + +If a value greater than 0 is returned, the driver can have multiple +constant buffers bound to shader stages. The CONST register file can +be accessed with two-dimensional indices, like in the example below. + +DCL CONST[0][0..7] # declare first 8 vectors of constbuf 0 +DCL CONST[3][0] # declare first vector of constbuf 3 +MOV OUT[0], CONST[0][3] # copy vector 3 of constbuf 0 + +For backwards compatibility, one-dimensional access to CONST register +file is still supported. In that case, the constbuf index is assumed +to be 0. + * ``MAX_CONST_BUFFER_SIZE``: Maximum byte size of a single constant buffer. * ``INDEP_BLEND_ENABLE``: Whether per-rendertarget blend enabling and channel masks are supported. If 0, then the first rendertarget's blend mask is @@ -85,64 +101,55 @@ The floating-point capabilities: * ``GUARD_BAND_LEFT``, ``GUARD_BAND_TOP``, ``GUARD_BAND_RIGHT``, ``GUARD_BAND_BOTTOM``: XXX -XXX Is there a better home for this? vvv - -If 0 is returned, the driver is not aware of multiple constant buffers, -supports binding of only one constant buffer, and does not support -two-dimensional CONST register file access in TGSI shaders. - -If a value greater than 0 is returned, the driver can have multiple -constant buffers bound to shader stages. The CONST register file can -be accessed with two-dimensional indices, like in the example below. - -DCL CONST[0][0..7] # declare first 8 vectors of constbuf 0 -DCL CONST[3][0] # declare first vector of constbuf 3 -MOV OUT[0], CONST[0][3] # copy vector 3 of constbuf 0 -For backwards compatibility, one-dimensional access to CONST register -file is still supported. In that case, the constbuf index is assumed -to be 0. .. _pipe_bind: -PIPE_BIND -^^^^^^^^^ +PIPE_BIND_* +^^^^^^^^^^^ -These flags control resource creation. Resources may be used in different roles +These flags indicate how a resource will be used and are specified at resource +creation time. Resources may be used in different roles during their lifecycle. Bind flags are cumulative and may be combined to create -a resource which can be used as multiple things. -Depending on the pipe driver's memory management, depending on these bind flags +a resource which can be used for multiple things. +Depending on the pipe driver's memory management and these bind flags, resources might be created and handled quite differently. -* ``RENDER_TARGET``: A color buffer or pixel buffer which will be rendered to. -* ``DISPLAY_TARGET``: A sharable buffer that can be given to another process. -* ``DEPTH_STENCIL``: A depth (Z) buffer or stencil buffer. Gallium does - not explicitly provide for stencil-only buffers, so any stencil buffer - validated here is implicitly also a depth buffer. -* ``SAMPLER_VIEW``: A texture that may be sampled from in a fragment or vertex - shader. -* ``VERTEX_BUFFER``: A vertex buffer. -* ``INDEX_BUFFER``: An element buffer. -* ``CONSTANT_BUFFER``: A buffer of shader constants. -* ``BLIT_SOURCE``: A blit source, as given to surface_copy. -* ``BLIT_DESTINATION``: A blit destination, as given to surface_copy and surface_fill. -* ``TRANSFER_WRITE``: A transfer object which will be written to. -* ``TRANSFER_READ``: A transfer object which will be read from. -* ``CUSTOM``: -* ``SCANOUT``: A front color buffer or scanout buffer. -* ``SHARED``: +* ``PIPE_BIND_RENDER_TARGET``: A color buffer or pixel buffer which will be + rendered to. Any surface/resource attached to pipe_framebuffer_state::cbufs + must have this flag set. +* ``PIPE_BIND_DEPTH_STENCIL``: A depth (Z) buffer and/or stencil buffer. Any + depth/stencil surface/resource attached to pipe_framebuffer_state::zsbuf must + have this flag set. +* ``PIPE_BIND_DISPLAY_TARGET``: A surface that can be presented to screen. Arguments to + pipe_screen::flush_front_buffer must have this flag set. +* ``PIPE_BIND_SAMPLER_VIEW``: A texture that may be sampled from in a fragment + or vertex shader. +* ``PIPE_BIND_VERTEX_BUFFER``: A vertex buffer. +* ``PIPE_BIND_INDEX_BUFFER``: An vertex index/element buffer. +* ``PIPE_BIND_CONSTANT_BUFFER``: A buffer of shader constants. +* ``PIPE_BIND_BLIT_SOURCE``: A blit source, as given to surface_copy. +* ``PIPE_BIND_BLIT_DESTINATION``: A blit destination, as given to surface_copy + and surface_fill. +* ``PIPE_BIND_TRANSFER_WRITE``: A transfer object which will be written to. +* ``PIPE_BIND_TRANSFER_READ``: A transfer object which will be read from. +* ``PIPE_BIND_CUSTOM``: +* ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer. +* ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another + process. .. _pipe_usage: -PIPE_USAGE -^^^^^^^^^^ +PIPE_USAGE_* +^^^^^^^^^^^^ + +The PIPE_USAGE enums are hints about the expected usage pattern of a resource. -The PIPE_USAGE enums are hints about the expected lifecycle of a resource. -* ``DEFAULT``: Expect many uploads to the resource, intermixed with draws. -* ``DYNAMIC``: Expect many uploads to the resource, intermixed with draws. -* ``STATIC``: Same as immutable (?) -* ``IMMUTABLE``: Resource will not be changed after first upload. -* ``STREAM``: Upload will be followed by draw, followed by upload, ... +* ``PIPE_USAGE_DEFAULT``: Expect many uploads to the resource, intermixed with draws. +* ``PIPE_USAGE_DYNAMIC``: Expect many uploads to the resource, intermixed with draws. +* ``PIPE_USAGE_STATIC``: Same as immutable (?) +* ``PIPE_USAGE_IMMUTABLE``: Resource will not be changed after first upload. +* ``PIPE_USAGE_STREAM``: Upload will be followed by draw, followed by upload, ... @@ -162,7 +169,7 @@ For example, a compressed format might only be used for POT textures. Methods ------- -XXX moar; got bored +XXX to-do get_name ^^^^^^^^ @@ -204,9 +211,15 @@ and/or front-buffer rendering. is_format_supported ^^^^^^^^^^^^^^^^^^^ -See if a format can be used in a specific manner. +Determine if a resource in the given format can be used in a specific manner. -**tex_usage** is a bitmask of :ref:`PIPE_BIND` flags. +**format** the resource format + +**target** one of the PIPE_TEXTURE_x flags + +**bindings** is a bitmask of :ref:`PIPE_BIND` flags. + +**geom_flags** is a bitmask of PIPE_TEXTURE_GEOM_x flags. Returns TRUE if all usages can be satisfied. @@ -214,15 +227,35 @@ Returns TRUE if all usages can be satisfied. .. _resource_create: resource_create -^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^ + +Create a new resource from a template. +The following fields of the pipe_resource must be specified in the template: + +target + +format + +width0 + +height0 + +depth0 + +last_level + +nr_samples + +usage + +bind + +flags + -Given a template of texture setup, create a resource. -The way a resource may be used is specifed by bind flags, :ref:`pipe_bind`. -and hints are used to indicate to the driver what access pattern might be -likely, :ref:`pipe_usage`. resource_destroy -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^ Destroy a resource. A resource is destroyed if it has no more references. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index c292cd37d5..e2c8602da0 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -786,33 +786,6 @@ This instruction replicates its result. TBD -.. opcode:: BGNFOR - Begin a For-Loop - - dst.x = floor(src.x) - dst.y = floor(src.y) - dst.z = floor(src.z) - - if (dst.y <= 0) - pc = [matching ENDFOR] + 1 - endif - - Note: The destination must be a loop register. - The source must be a constant register. - -.. note:: - - Considered for cleanup. - -.. note:: - - Considered for removal. - - -.. opcode:: REP - Repeat - - TBD - - .. opcode:: ELSE - Else TBD @@ -823,30 +796,6 @@ This instruction replicates its result. TBD -.. opcode:: ENDFOR - End a For-Loop - - dst.x = dst.x + dst.z - dst.y = dst.y - 1.0 - - if (dst.y > 0) - pc = [matching BGNFOR instruction] + 1 - endif - - Note: The destination must be a loop register. - -.. note:: - - Considered for cleanup. - -.. note:: - - Considered for removal. - -.. opcode:: ENDREP - End Repeat - - TBD - - .. opcode:: PUSHA - Push Address Register On Stack push(src.x) diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d2166a4901..d7788bd9bb 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1622,14 +1622,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - ASSERT (0); - break; - - case TGSI_OPCODE_ENDREP: - ASSERT (0); - break; - case TGSI_OPCODE_PUSHA: ASSERT (0); break; @@ -1743,8 +1735,6 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_BGNFOR: - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -1753,8 +1743,6 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ ASSERT(mach->ContStackTop > 0); diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 236c50f4d9..9515cd8938 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -39,7 +39,7 @@ static void failover_destroy( struct pipe_context *pipe ) { struct failover_context *failover = failover_context( pipe ); - free( failover ); + FREE( failover ); } diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index b682ce6750..272e683067 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -29,6 +29,7 @@ */ #include "util/u_inlines.h" +#include "util/u_memory.h" #include "fo_context.h" @@ -53,7 +54,7 @@ static void * failover_create_blend_state( struct pipe_context *pipe, const struct pipe_blend_state *blend ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_blend_state(failover->sw, blend); @@ -85,7 +86,7 @@ failover_delete_blend_state( struct pipe_context *pipe, failover->hw->delete_blend_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void @@ -129,7 +130,7 @@ static void * failover_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); @@ -161,7 +162,7 @@ failover_delete_depth_stencil_state(struct pipe_context *pipe, failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void @@ -181,7 +182,7 @@ static void * failover_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_fs_state(failover->sw, templ); @@ -212,14 +213,14 @@ failover_delete_fs_state(struct pipe_context *pipe, failover->hw->delete_fs_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void * failover_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_vs_state(failover->sw, templ); @@ -252,7 +253,7 @@ failover_delete_vs_state(struct pipe_context *pipe, failover->hw->delete_vs_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } @@ -262,7 +263,7 @@ failover_create_vertex_elements_state( struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *velems ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems); @@ -295,7 +296,7 @@ failover_delete_vertex_elements_state( struct pipe_context *pipe, failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void @@ -315,7 +316,7 @@ static void * failover_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); @@ -348,7 +349,7 @@ failover_delete_rasterizer_state(struct pipe_context *pipe, failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } @@ -369,7 +370,7 @@ static void * failover_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); @@ -443,7 +444,7 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) failover->hw->delete_sampler_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } @@ -452,7 +453,7 @@ failover_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) { - struct fo_sampler_view *view = malloc(sizeof(struct fo_sampler_view)); + struct fo_sampler_view *view = MALLOC(sizeof(struct fo_sampler_view)); struct failover_context *failover = failover_context(pipe); view->sw = failover->sw->create_sampler_view(failover->sw, texture, templ); @@ -478,7 +479,7 @@ failover_sampler_view_destroy(struct pipe_context *pipe, failover->hw->sampler_view_destroy(failover->hw, fo_view->hw); pipe_resource_reference(&fo_view->base.texture, NULL); - free(fo_view); + FREE(fo_view); } static void diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 323af16b14..a701de33f5 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -168,7 +168,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) if (sz == 0) { if (brw->curbe.last_buf) { - free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 7d212e5c24..ce5ed0a9ed 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -84,8 +84,8 @@ void brw_clear_batch_cache( struct brw_context *brw ) while (item) { struct brw_cached_batch_item *next = item->next; - free((void *)item->header); - free(item); + FREE((void *)item->header); + FREE(item); item = next; } diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 6c47415cac..bd8b9174a8 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -40,6 +40,7 @@ #include <stdint.h> #include <string.h> +#include "util/u_string.h" #include "intel_decode.h" /*#include "intel_chipset.h"*/ @@ -478,7 +479,7 @@ i915_get_instruction_src0(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -496,7 +497,7 @@ i915_get_instruction_src1(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -513,7 +514,7 @@ i915_get_instruction_src2(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -642,7 +643,7 @@ i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_pre switch ((d0 >> 19) & 0x3) { case 1: - sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + util_snprintf(dcl_mask, sizeof(dcl_mask), ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); if (strcmp(dcl_mask, ".") == 0) fprintf(out, "bad (empty) dcl mask\n"); @@ -976,7 +977,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, if (i + 3 >= count) BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM"); - sprintf(instr_prefix, "PS%03d", instr); + util_snprintf(instr_prefix, sizeof(instr_prefix), "PS%03d", instr); i915_decode_instruction(data, hw_offset, i, instr_prefix); i += 3; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 630cdb5e49..0bc8bf2196 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -42,7 +42,7 @@ identity_destroy(struct pipe_context *_pipe) pipe->destroy(pipe); - free(id_pipe); + FREE(id_pipe); } static void @@ -708,7 +708,7 @@ identity_create_sampler_view(struct pipe_context *pipe, struct identity_resource *id_resource = identity_resource(texture); struct pipe_context *pipe_unwrapped = id_pipe->pipe; struct pipe_resource *texture_unwrapped = id_resource->resource; - struct identity_sampler_view *view = malloc(sizeof(struct identity_sampler_view)); + struct identity_sampler_view *view = MALLOC(sizeof(struct identity_sampler_view)); view->sampler_view = pipe_unwrapped->create_sampler_view(pipe_unwrapped, texture_unwrapped, @@ -736,7 +736,7 @@ identity_sampler_view_destroy(struct pipe_context *pipe, view_unwrapped); pipe_resource_reference(&view->texture, NULL); - free(view); + FREE(view); } static struct pipe_transfer * diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c index d332c36af2..a9d41af18c 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/identity/id_drm.c @@ -68,7 +68,7 @@ identity_drm_destroy(struct drm_api *_api) struct drm_api *api = id_api->api; api->destroy(api); - free(id_api); + FREE(id_api); } struct drm_api * diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4a3fc036c4..4ea367597e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -40,7 +40,7 @@ C_SOURCES = \ lp_state_vertex.c \ lp_state_vs.c \ lp_surface.c \ - lp_tex_sample_llvm.c \ + lp_tex_sample.c \ lp_texture.c \ lp_tile_image.c \ lp_tile_soa.c @@ -58,8 +58,9 @@ include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ - -LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a +LDFLAGS += $(LLVM_LDFLAGS) +LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) +LD=g++ $(PROGS): lp_test_main.o libllvmpipe.a diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index b9e9826e2a..2911cf2179 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -60,7 +60,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_tex_sample_llvm.c', + 'lp_tex_sample.c', 'lp_texture.c', 'lp_tile_image.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f7cf06d8d4..32b80d3a9f 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -97,63 +97,24 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; llvmpipe->pipe.priv = priv; - llvmpipe->pipe.destroy = llvmpipe_destroy; - - /* state setters */ - llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; - llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; - llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; - - llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; - llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; - llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; - - llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; - llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; - llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; - - llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; - llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; - llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; - - llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; - llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; - llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; - - llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; - llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; - llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; - llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; - llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; - llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; - - llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; - llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; - llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; - llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; + /* Init the pipe context methods */ + llvmpipe->pipe.destroy = llvmpipe_destroy; llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; - llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; - llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; - llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; - llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; - llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; - llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; - - llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; - - llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; - llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; - llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = llvmpipe_flush; - + llvmpipe_init_blend_funcs(llvmpipe); + llvmpipe_init_clip_funcs(llvmpipe); + llvmpipe_init_draw_funcs(llvmpipe); + llvmpipe_init_sampler_funcs(llvmpipe); llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_vertex_funcs(llvmpipe); + llvmpipe_init_fs_funcs(llvmpipe); + llvmpipe_init_vs_funcs(llvmpipe); + llvmpipe_init_rasterizer_funcs(llvmpipe); llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); + llvmpipe_init_surface_functions(llvmpipe); /* * Create drawing context and plug our rendering stage into it. @@ -186,8 +147,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); #endif - lp_init_surface_functions(llvmpipe); - lp_reset_counters(); return &llvmpipe->pipe; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 4848101ffb..4e597b2479 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -94,9 +94,6 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; - /** Which vertex shader output slot contains point size */ - int psize_slot; - /** The tiling engine */ struct lp_setup_context *setup; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 0b63e1c889..98780d7631 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -42,20 +42,12 @@ -void -llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - /** * Draw vertex arrays, with optional indexing. * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -void +static void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, @@ -115,7 +107,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, } -void +static void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, @@ -128,3 +120,19 @@ llvmpipe_draw_elements(struct pipe_context *pipe, mode, start, count ); } + +static void +llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); +} + + +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; + llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; + llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; +} diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 3627dbd759..644b821957 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -96,41 +96,40 @@ llvmpipe_flush( struct pipe_context *pipe, /** * Flush context if necessary. * - * TODO: move this logic to an auxiliary library? + * Returns FALSE if it would have block, but do_not_block was set, TRUE + * otherwise. * - * FIXME: We must implement DISCARD/DONTBLOCK/UNSYNCHRONIZED/etc for - * textures to avoid blocking. + * TODO: move this logic to an auxiliary library? */ boolean -llvmpipe_flush_texture(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, - unsigned level, - unsigned flush_flags, - boolean read_only, - boolean cpu_access, - boolean do_not_flush) +llvmpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block) { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, texture, face, level); + referenced = pipe->is_resource_referenced(pipe, resource, face, level); if ((referenced & PIPE_REFERENCED_FOR_WRITE) || ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { - if (do_not_flush) - return FALSE; - - /* - * TODO: The semantics of these flush flags are too obtuse. They should - * disappear and the pipe driver should just ensure that all visible - * side-effects happen when they need to happen. - */ - if (referenced & PIPE_REFERENCED_FOR_WRITE) - flush_flags |= PIPE_FLUSH_RENDER_CACHE; + if (resource->target != PIPE_BUFFER) { + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; - if (referenced & PIPE_REFERENCED_FOR_READ) - flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + } if (cpu_access) { /* @@ -139,6 +138,9 @@ llvmpipe_flush_texture(struct pipe_context *pipe, struct pipe_fence_handle *fence = NULL; + if (do_not_block) + return FALSE; + pipe->flush(pipe, flush_flags, &fence); if (fence) { diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 2375d22b85..7b605681a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -33,17 +33,18 @@ struct pipe_context; struct pipe_fence_handle; -void llvmpipe_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); +void +llvmpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); boolean -llvmpipe_flush_texture(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, - unsigned level, - unsigned flush_flags, - boolean read_only, - boolean cpu_access, - boolean do_not_flush); +llvmpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 8690941a50..466a2f54fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -171,15 +171,6 @@ lp_jit_screen_cleanup(struct llvmpipe_screen *screen) void lp_jit_screen_init(struct llvmpipe_screen *screen) { - util_cpu_detect(); - -#if 0 - /* For simulating less capable machines */ - util_cpu_caps.has_sse3 = 0; - util_cpu_caps.has_ssse3 = 0; - util_cpu_caps.has_sse4_1 = 0; -#endif - lp_build_init(); screen->module = lp_build_module; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_size.h b/src/gallium/drivers/llvmpipe/lp_limits.h index f0b983c063..4102a9df67 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_size.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -25,8 +25,12 @@ * **************************************************************************/ -#ifndef LP_TILE_SIZE_H -#define LP_TILE_SIZE_H +/** + * Implementation limits for LLVMpipe driver. + */ + +#ifndef LP_LIMITS_H +#define LP_LIMITS_H /** @@ -36,4 +40,31 @@ #define TILE_SIZE (1 << TILE_ORDER) -#endif +/** + * Max texture sizes + */ +#define LP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K for now */ +#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ + + +/** This must be the larger of LP_MAX_TEXTURE_2D/3D_LEVELS */ +#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS + + +/** + * Max drawing surface size is the max texture size + */ +#define LP_MAX_HEIGHT (1 << (LP_MAX_TEXTURE_LEVELS - 1)) +#define LP_MAX_WIDTH (1 << (LP_MAX_TEXTURE_LEVELS - 1)) + + +#define LP_MAX_THREADS 8 + + +/** + * Max bytes per scene. This may be replaced by a runtime parameter. + */ +#define LP_MAX_SCENE_SIZE (512 * 1024 * 1024) + + +#endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 4046701b85..a00a592f2f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,7 +28,6 @@ #include <limits.h> #include "util/u_memory.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" #include "util/u_surface.h" #include "lp_scene_queue.h" @@ -122,9 +121,11 @@ lp_rast_end( struct lp_rasterizer *rast ) rast->curr_scene = NULL; +#ifdef DEBUG if (0) - printf("Post render scene: tile read: %d tile write: %d\n", - tile_read_count, tile_write_count); + debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n", + lp_tile_unswizzle_count, lp_tile_swizzle_count); +#endif } @@ -869,20 +870,6 @@ create_rast_threads(struct lp_rasterizer *rast) { unsigned i; -#ifdef PIPE_OS_WINDOWS - /* Multithreading not supported on windows until conditions and barriers are - * properly implemented. */ - rast->num_threads = 0; -#else -#ifdef PIPE_OS_EMBEDDED - rast->num_threads = 0; -#else - rast->num_threads = util_cpu_caps.nr_cpus; -#endif - rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); - rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); -#endif - /* NOTE: if num_threads is zero, we won't use any threads */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_init(&rast->tasks[i].work_ready, 0); @@ -895,12 +882,12 @@ create_rast_threads(struct lp_rasterizer *rast) /** - * Create new lp_rasterizer. - * \param empty the queue to put empty scenes on after we've finished - * processing them. + * Create new lp_rasterizer. If num_threads is zero, don't create any + * new threads, do rendering synchronously. + * \param num_threads number of rasterizer threads to create */ struct lp_rasterizer * -lp_rast_create( void ) +lp_rast_create( unsigned num_threads ) { struct lp_rasterizer *rast; unsigned i; @@ -917,6 +904,8 @@ lp_rast_create( void ) task->thread_index = i; } + rast->num_threads = num_threads; + create_rast_threads(rast); /* for synchronizing rasterization threads */ @@ -955,6 +944,8 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); + lp_scene_queue_destroy(rast->full_scenes); + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index a0ecb2fc47..e2f6f92677 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -134,7 +134,7 @@ struct lp_rast_triangle { struct lp_rasterizer * -lp_rast_create( void ); +lp_rast_create( unsigned num_threads ); void lp_rast_destroy( struct lp_rasterizer * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 8bf2b92a6a..5884d12721 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -35,9 +35,7 @@ #include "lp_scene.h" #include "lp_texture.h" #include "lp_tile_soa.h" - - -#define MAX_THREADS 8 /* XXX probably temporary here */ +#include "lp_limits.h" struct lp_rasterizer; @@ -107,16 +105,16 @@ struct lp_rasterizer * (potentially) shared, these empty scenes should be returned to * the context which created them rather than retained here. */ - struct lp_scene_queue *empty_scenes; + /* struct lp_scene_queue *empty_scenes; */ /** The scene currently being rasterized by the threads */ struct lp_scene *curr_scene; /** A task object for each rasterization thread */ - struct lp_rasterizer_task tasks[MAX_THREADS]; + struct lp_rasterizer_task tasks[LP_MAX_THREADS]; unsigned num_threads; - pipe_thread threads[MAX_THREADS]; + pipe_thread threads[LP_MAX_THREADS]; /** For synchronizing the rasterization threads */ pipe_barrier barrier; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 182e7cb230..1482a777ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -32,9 +32,20 @@ #include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" -#include "lp_debug.h" +/** List of texture references */ +struct texture_ref { + struct pipe_resource *texture; + struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +}; + + + +/** + * Create a new scene object. + * \param queue the queue to put newly rendered/emptied scenes into + */ struct lp_scene * lp_scene_create( struct pipe_context *pipe, struct lp_scene_queue *queue ) @@ -57,7 +68,7 @@ lp_scene_create( struct pipe_context *pipe, scene->data.head = scene->data.tail = CALLOC_STRUCT(data_block); - make_empty_list(&scene->textures); + make_empty_list(&scene->resources); pipe_mutex_init(scene->mutex); @@ -66,7 +77,7 @@ lp_scene_create( struct pipe_context *pipe, /** - * Free all data associated with the given scene, and free(scene). + * Free all data associated with the given scene, and the scene itself. */ void lp_scene_destroy(struct lp_scene *scene) @@ -178,15 +189,17 @@ lp_scene_reset(struct lp_scene *scene ) /* Release texture refs */ { - struct texture_ref *ref, *next, *ref_list = &scene->textures; + struct resource_ref *ref, *next, *ref_list = &scene->resources; for (ref = ref_list->next; ref != ref_list; ref = next) { next = next_elem(ref); - pipe_resource_reference(&ref->texture, NULL); + pipe_resource_reference(&ref->resource, NULL); FREE(ref); } make_empty_list(ref_list); } + scene->scene_size = 0; + scene->has_color_clear = FALSE; scene->has_depth_clear = FALSE; } @@ -218,7 +231,10 @@ lp_bin_new_data_block( struct data_block_list *list ) } -/** Return number of bytes used for all bin data within a scene */ +/** + * Return number of bytes used for all bin data within a scene. + * This does not include resources (textures) referenced by the scene. + */ unsigned lp_scene_data_size( const struct lp_scene *scene ) { @@ -247,32 +263,34 @@ lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) /** - * Add a reference to a texture by the scene. + * Add a reference to a resource by the scene. */ void -lp_scene_texture_reference( struct lp_scene *scene, - struct pipe_resource *texture ) +lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource) { - struct texture_ref *ref = CALLOC_STRUCT(texture_ref); + struct resource_ref *ref = CALLOC_STRUCT(resource_ref); if (ref) { - struct texture_ref *ref_list = &scene->textures; - pipe_resource_reference(&ref->texture, texture); + struct resource_ref *ref_list = &scene->resources; + pipe_resource_reference(&ref->resource, resource); insert_at_tail(ref_list, ref); } + + scene->scene_size += llvmpipe_resource_size(resource); } /** - * Does this scene have a reference to the given texture? + * Does this scene have a reference to the given resource? */ boolean -lp_scene_is_resource_referenced( const struct lp_scene *scene, - const struct pipe_resource *texture ) +lp_scene_is_resource_referenced(const struct lp_scene *scene, + const struct pipe_resource *resource) { - const struct texture_ref *ref_list = &scene->textures; - const struct texture_ref *ref; + const struct resource_ref *ref_list = &scene->resources; + const struct resource_ref *ref; foreach (ref, ref_list) { - if (ref->texture == texture) + if (ref->resource == resource) return TRUE; } return FALSE; @@ -393,61 +411,6 @@ end: } - -/** - * Prepare this scene for the rasterizer. - * Map the framebuffer surfaces. Initialize the 'rast' state. - */ -static boolean -lp_scene_map_buffers( struct lp_scene *scene ) -{ - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - /* XXX framebuffer surfaces are no longer mapped here */ - /* XXX move all map/unmap stuff into rast module... */ - - return TRUE; -} - - - -/** - * Called after rasterizer as finished rasterizing a scene. - * - * We want to call this from the pipe_context's current thread to - * avoid having to have mutexes on the transfer functions. - */ -static void -lp_scene_unmap_buffers( struct lp_scene *scene ) -{ -#if 0 - unsigned i; - - for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (scene->cbuf_map[i]) { - struct pipe_surface *cbuf = scene->fb.cbufs[i]; - llvmpipe_resource_unmap(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice); - scene->cbuf_map[i] = NULL; - } - } - - if (scene->zsbuf_map) { - struct pipe_surface *zsbuf = scene->fb.zsbuf; - llvmpipe_resource_unmap(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice); - scene->zsbuf_map = NULL; - } -#endif - - util_unreference_framebuffer_state( &scene->fb ); -} - - void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ) { @@ -464,8 +427,7 @@ void lp_scene_begin_binning( struct lp_scene *scene, void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast, - boolean write_depth ) + struct lp_rasterizer *rast ) { if (0) { unsigned x, y; @@ -479,11 +441,6 @@ void lp_scene_rasterize( struct lp_scene *scene, } } - scene->write_depth = (scene->fb.zsbuf != NULL && - write_depth); - - lp_scene_map_buffers( scene ); - /* Enqueue the scene for rasterization, then immediately wait for * it to finish. */ @@ -494,6 +451,9 @@ void lp_scene_rasterize( struct lp_scene *scene, * transfers become per-context: */ lp_rast_finish( rast ); - lp_scene_unmap_buffers( scene ); + + util_unreference_framebuffer_state( &scene->fb ); + + /* put scene into the empty list */ lp_scene_enqueue( scene->empty_queue, scene ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index ac0717db6a..9467cd6f16 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -44,10 +44,8 @@ struct lp_scene_queue; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILE_SIZE) -#define TILES_Y (MAXHEIGHT / TILE_SIZE) +#define TILES_X (LP_MAX_WIDTH / TILE_SIZE) +#define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE) #define CMD_BLOCK_MAX 128 @@ -97,10 +95,10 @@ struct data_block_list { }; -/** List of texture references */ -struct texture_ref { - struct pipe_resource *texture; - struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +/** List of resource references */ +struct resource_ref { + struct pipe_resource *resource; + struct resource_ref *prev, *next; /**< linked list w/ u_simple_list.h */ }; @@ -118,10 +116,14 @@ struct lp_scene { /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; - /** list of textures referenced by the scene commands */ - struct texture_ref textures; + /** list of resources referenced by the scene commands */ + struct resource_ref resources; + + /** Approx memory used by the scene (in bytes). This includes the + * shared and per-tile bins plus any referenced resources/textures. + */ + unsigned scene_size; - boolean write_depth; boolean has_color_clear; boolean has_depth_clear; @@ -164,11 +166,11 @@ unsigned lp_scene_data_size( const struct lp_scene *scene ); unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); -void lp_scene_texture_reference( struct lp_scene *scene, - struct pipe_resource *texture ); +void lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource); -boolean lp_scene_is_resource_referenced( const struct lp_scene *scene, - const struct pipe_resource *texture ); +boolean lp_scene_is_resource_referenced(const struct lp_scene *scene, + const struct pipe_resource *resource ); /** @@ -184,6 +186,8 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size) lp_bin_new_data_block( list ); } + scene->scene_size += size; + { struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; @@ -206,6 +210,8 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, lp_bin_new_data_block( list ); } + scene->scene_size += size; + { struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; @@ -222,6 +228,7 @@ static INLINE void lp_scene_putback_data( struct lp_scene *scene, unsigned size) { struct data_block_list *list = &scene->data; + scene->scene_size -= size; assert(list->tail->used >= size); list->tail->used -= size; } @@ -304,11 +311,18 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast, - boolean write_depth ); + struct lp_rasterizer *rast ); void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ); + +static INLINE unsigned +lp_scene_get_size(const struct lp_scene *scene) +{ + return scene->scene_size; +} + + #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 6d309c6b64..111eedc4f2 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,8 @@ #include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" @@ -39,6 +41,7 @@ #include "lp_context.h" #include "lp_debug.h" #include "lp_public.h" +#include "lp_limits.h" #include "state_tracker/sw_winsys.h" @@ -94,6 +97,8 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -165,7 +170,7 @@ static boolean llvmpipe_is_format_supported( struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, - unsigned tex_usage, + unsigned bind, unsigned geom_flags ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); @@ -173,7 +178,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, const struct util_format_description *format_desc; format_desc = util_format_description(format); - if(!format_desc) + if (!format_desc) return FALSE; assert(target == PIPE_TEXTURE_1D || @@ -181,45 +186,42 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - switch(format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return util_format_s3tc_enabled; - default: - break; - } - - if(tex_usage & PIPE_BIND_RENDER_TARGET) { - if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) return FALSE; - if(format_desc->block.width != 1 || - format_desc->block.height != 1) + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; - if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + if (format_desc->block.width != 1 || + format_desc->block.height != 1) return FALSE; } - if(tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) + if (bind & PIPE_BIND_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, bind, format)) return FALSE; } - if(tex_usage & PIPE_BIND_DEPTH_STENCIL) { - if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return FALSE; + + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; /* FIXME: Temporary restriction. See lp_state_fs.c. */ - if(format_desc->block.bits != 32) + if (format_desc->block.bits != 32) return FALSE; } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + return util_format_s3tc_enabled; + } + + /* + * Everything else should be supported by u_format. + */ return TRUE; } @@ -286,12 +288,26 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; - util_format_s3tc_init(); - llvmpipe_init_screen_resource_funcs(&screen->base); llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); +#ifdef PIPE_OS_WINDOWS + /* Multithreading not supported on windows until conditions and barriers are + * properly implemented. */ + screen->num_threads = 0; +#else +#ifdef PIPE_OS_EMBEDDED + screen->num_threads = 0; +#else + screen->num_threads = util_cpu_caps.nr_cpus; +#endif + screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); + screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); +#endif + + util_format_s3tc_init(); + return &screen->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index af25e043cc..4f39432610 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -58,6 +58,8 @@ struct llvmpipe_screen LLVMTypeRef context_ptr_type; + unsigned num_threads; + /* Increments whenever textures are modified. Contexts can track * this. */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6be13c60a5..2150956008 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -74,6 +74,26 @@ lp_setup_get_current_scene(struct lp_setup_context *setup) } +/** + * Check if the size of the current scene has exceeded the limit. + * If so, flush/render it. + */ +static void +setup_check_scene_size_and_flush(struct lp_setup_context *setup) +{ + if (setup->scene) { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + unsigned size = lp_scene_get_size(scene); + + if (size > LP_MAX_SCENE_SIZE) { + /*printf("LLVMPIPE: scene size = %u, flushing.\n", size);*/ + set_scene_state( setup, SETUP_FLUSHED ); + /*assert(lp_scene_get_size(scene) == 0);*/ + } + } +} + + static void first_triangle( struct lp_setup_context *setup, const float (*v0)[4], @@ -132,14 +152,11 @@ static void reset_context( struct lp_setup_context *setup ) /** Rasterize all scene's bins */ static void -lp_setup_rasterize_scene( struct lp_setup_context *setup, - boolean write_depth ) +lp_setup_rasterize_scene( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_scene_rasterize(scene, - setup->rast, - write_depth); + lp_scene_rasterize(scene, setup->rast); reset_context( setup ); @@ -190,7 +207,7 @@ execute_clears( struct lp_setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - lp_setup_rasterize_scene( setup, TRUE ); + lp_setup_rasterize_scene( setup ); } @@ -221,7 +238,7 @@ set_scene_state( struct lp_setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - lp_setup_rasterize_scene( setup, TRUE ); + lp_setup_rasterize_scene( setup ); break; default: @@ -243,7 +260,7 @@ lp_setup_flush( struct lp_setup_context *setup, if (setup->scene) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - union lp_rast_cmd_arg dummy; + union lp_rast_cmd_arg dummy = {0}; if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { @@ -343,20 +360,25 @@ lp_setup_clear( struct lp_setup_context *setup, struct pipe_fence_handle * lp_setup_fence( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ - struct lp_fence *fence = lp_fence_create(rank); + if (setup->num_threads == 0) { + return NULL; + } + else { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ + struct lp_fence *fence = lp_fence_create(rank); - LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - set_scene_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); - /* insert the fence into all command bins */ - lp_scene_bin_everywhere( scene, - lp_rast_fence, - lp_rast_arg_fence(fence) ); + /* insert the fence into all command bins */ + lp_scene_bin_everywhere( scene, + lp_rast_fence, + lp_rast_arg_fence(fence) ); - return (struct pipe_fence_handle *) fence; + return (struct pipe_fence_handle *) fence; + } } @@ -591,10 +613,14 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, void lp_setup_update_state( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + setup_check_scene_size_and_flush(setup); + + scene = lp_setup_get_current_scene(setup); + assert(setup->fs.current.jit_function); /* Some of the 'draw' pipeline stages may have changed some driver state. @@ -715,7 +741,7 @@ lp_setup_update_state( struct lp_setup_context *setup ) */ for (i = 0; i < Elements(setup->fs.current_tex); i++) { if (setup->fs.current_tex[i]) - lp_scene_texture_reference(scene, setup->fs.current_tex[i]); + lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]); } } } @@ -736,6 +762,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) reset_context( setup ); + util_unreference_framebuffer_state(&setup->fb); + for (i = 0; i < Elements(setup->fs.current_tex); i++) { pipe_resource_reference(&setup->fs.current_tex[i], NULL); } @@ -750,6 +778,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_destroy(scene); } + lp_scene_queue_destroy(setup->empty_scenes); + lp_rast_destroy( setup->rast ); FREE( setup ); @@ -765,8 +795,9 @@ struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { - unsigned i; + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context); + unsigned i; if (!setup) return NULL; @@ -779,7 +810,8 @@ lp_setup_create( struct pipe_context *pipe, /* XXX: move this to the screen and share between contexts: */ - setup->rast = lp_rast_create(); + setup->num_threads = screen->num_threads; + setup->rast = lp_rast_create(screen->num_threads); if (!setup->rast) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 4594f7597d..584764ce8a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -80,6 +80,7 @@ struct lp_setup_context * create/install this itself now. */ struct draw_stage *vbuf; + unsigned num_threads; struct lp_rasterizer *rast; struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index a401275478..5d3122e8ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -440,7 +440,12 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_setup_vbuf_destroy(struct vbuf_render *vbr) { - lp_setup_destroy(lp_setup_context(vbr)); + struct lp_setup_context *setup = lp_setup_context(vbr); + if (setup->vertex_buffer) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = NULL; + } + lp_setup_destroy(setup); } diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index dcbff190b6..18143807c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,11 +31,10 @@ #ifndef LP_STATE_H #define LP_STATE_H -#include "gallivm/lp_bld.h" - #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "gallivm/lp_bld.h" #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ @@ -85,8 +84,6 @@ struct lp_fragment_shader_variant_key struct lp_fragment_shader_variant { - struct lp_fragment_shader *shader; - struct lp_fragment_shader_variant_key key; LLVMValueRef function[2]; @@ -97,11 +94,7 @@ struct lp_fragment_shader_variant }; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ +/** Subclass of pipe_shader_state */ struct lp_fragment_shader { struct pipe_shader_state base; @@ -109,140 +102,58 @@ struct lp_fragment_shader struct tgsi_shader_info info; struct lp_fragment_shader_variant *variants; - - struct lp_fragment_shader_variant *current; }; /** Subclass of pipe_shader_state */ -struct lp_vertex_shader { +struct lp_vertex_shader +{ struct pipe_shader_state shader; struct draw_vertex_shader *draw_data; }; -struct lp_velems_state { + +/** Vertex element state */ +struct lp_velems_state +{ unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; }; -void * -llvmpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void llvmpipe_bind_blend_state(struct pipe_context *, - void *); -void llvmpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -llvmpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); void -llvmpipe_bind_vertex_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void llvmpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -llvmpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void llvmpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void llvmpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -llvmpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void llvmpipe_bind_rasterizer_state(struct pipe_context *, void *); -void llvmpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void llvmpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void llvmpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void llvmpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ); - -void llvmpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void llvmpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - struct pipe_resource *buf); - -void *llvmpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void llvmpipe_bind_fs_state(struct pipe_context *, void *); -void llvmpipe_delete_fs_state(struct pipe_context *, void *); -void *llvmpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void llvmpipe_bind_vs_state(struct pipe_context *, void *); -void llvmpipe_delete_vs_state(struct pipe_context *, void *); - -void *llvmpipe_create_vertex_elements_state(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); -void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *); -void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *); - -void llvmpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void llvmpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void llvmpipe_set_fragment_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); +llvmpipe_set_framebuffer_state(struct pipe_context *, + const struct pipe_framebuffer_state *); void -llvmpipe_set_vertex_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); - -struct pipe_sampler_view * -llvmpipe_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ); +llvmpipe_update_fs(struct llvmpipe_context *lp); void -llvmpipe_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view); +llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); -void llvmpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void llvmpipe_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_update_fs(struct llvmpipe_context *lp); +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe); +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count); void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe); void -llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe); void -llvmpipe_unmap_texture_surfaces(struct llvmpipe_context *lp); +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 4ee28473e8..8569507f4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -40,15 +40,16 @@ #include "lp_state.h" -void * +static void * llvmpipe_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { return mem_dup(blend, sizeof(*blend)); } -void llvmpipe_bind_blend_state( struct pipe_context *pipe, - void *blend ) + +static void +llvmpipe_bind_blend_state(struct pipe_context *pipe, void *blend) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -62,15 +63,17 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, llvmpipe->dirty |= LP_NEW_BLEND; } -void llvmpipe_delete_blend_state(struct pipe_context *pipe, - void *blend) + +static void +llvmpipe_delete_blend_state(struct pipe_context *pipe, void *blend) { FREE( blend ); } -void llvmpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) +static void +llvmpipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -93,14 +96,15 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, */ -void * +static void * llvmpipe_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { return mem_dup(depth_stencil, sizeof(*depth_stencil)); } -void + +static void llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { @@ -116,14 +120,17 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void llvmpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) { FREE( depth ); } -void llvmpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ) + +static void +llvmpipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *stencil_ref) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -142,3 +149,18 @@ void llvmpipe_set_stencil_ref( struct pipe_context *pipe, } +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; + llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; + llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; + + llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; + llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; + llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; + + llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; + + llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_clip.c b/src/gallium/drivers/llvmpipe/lp_state_clip.c index df68f27acc..32ae079cc1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_clip.c +++ b/src/gallium/drivers/llvmpipe/lp_state_clip.c @@ -32,8 +32,9 @@ #include "draw/draw_context.h" -void llvmpipe_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +llvmpipe_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -42,8 +43,9 @@ void llvmpipe_set_clip_state( struct pipe_context *pipe, } -void llvmpipe_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) +static void +llvmpipe_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -55,8 +57,9 @@ void llvmpipe_set_viewport_state( struct pipe_context *pipe, } -void llvmpipe_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) +static void +llvmpipe_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -67,8 +70,9 @@ void llvmpipe_set_scissor_state( struct pipe_context *pipe, } -void llvmpipe_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) +static void +llvmpipe_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -77,3 +81,14 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *pipe, llvmpipe->poly_stipple = *stipple; /* struct copy */ llvmpipe->dirty |= LP_NEW_STIPPLE; } + + + +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; + llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; + llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; + llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 59d5a440c0..513e62e39e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -403,9 +403,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef step2_ptr) { const struct tgsi_token *tokens = shader->base.tokens; - LLVMTypeRef elem_type; LLVMTypeRef vec_type; - LLVMTypeRef int_vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; @@ -422,9 +420,7 @@ generate_fs(struct llvmpipe_context *lp, stencil_refs[0] = lp_jit_context_stencil_ref_front_value(builder, context_ptr); stencil_refs[1] = lp_jit_context_stencil_ref_back_value(builder, context_ptr); - elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); - int_vec_type = lp_build_int_vec_type(type); consts_ptr = lp_jit_context_constants(builder, context_ptr); @@ -474,7 +470,7 @@ generate_fs(struct llvmpipe_context *lp, lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler); + outputs, sampler, &shader->info); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -546,7 +542,6 @@ generate_blend(const struct pipe_blend_state *blend, struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; - LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; @@ -561,7 +556,6 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); - int_vec_type = lp_build_int_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, @@ -624,10 +618,8 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; - LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[15]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); @@ -682,11 +674,9 @@ generate_fragment(struct llvmpipe_context *lp, */ fs_elem_type = lp_build_elem_type(fs_type); - fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); - blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ @@ -945,7 +935,6 @@ generate_variant(struct llvmpipe_context *lp, if(!variant) return NULL; - variant->shader = shader; memcpy(&variant->key, key, sizeof *key); generate_fragment(lp, shader, variant, 0); @@ -959,7 +948,7 @@ generate_variant(struct llvmpipe_context *lp, } -void * +static void * llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -975,11 +964,16 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); + tgsi_dump(templ->tokens, 0); + } + return shader; } -void +static void llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -995,7 +989,7 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) } -void +static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -1038,7 +1032,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) -void +static void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_resource *constants) @@ -1112,8 +1106,8 @@ make_variant_key(struct llvmpipe_context *lp, unsigned chan; format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); - assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || - format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); key->blend.rt[i].colormask = lp->blend->rt[i].colormask; @@ -1169,8 +1163,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ } - shader->current = variant; - /* TODO: put this in the variant */ /* TODO: most of these can be relaxed, in particular the colormask */ opaque = !key.blend.logicop_enable && @@ -1184,7 +1176,19 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) ? TRUE : FALSE; lp_setup_set_fs_functions(lp->setup, - shader->current->jit_function[RAST_WHOLE], - shader->current->jit_function[RAST_EDGE_TEST], + variant->jit_function[RAST_WHOLE], + variant->jit_function[RAST_EDGE_TEST], opaque); } + + + +void +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; + llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; + llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; + + llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 47f65fe72d..622eb47ff4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -34,7 +34,7 @@ -void * +static void * llvmpipe_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *rast) { @@ -46,7 +46,7 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, -void +static void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -71,16 +71,27 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, llvmpipe->rasterizer->scissor, llvmpipe->rasterizer->gl_rasterization_rules); + lp_setup_set_flatshade_first( llvmpipe->setup, + llvmpipe->rasterizer->flatshade_first); } llvmpipe->dirty |= LP_NEW_RASTERIZER; } -void llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) +static void +llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { FREE( rasterizer ); } + +void +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; + llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; + llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 3552ff50ce..55d43368a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -41,7 +41,7 @@ -void * +static void * llvmpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { @@ -49,7 +49,7 @@ llvmpipe_create_sampler_state(struct pipe_context *pipe, } -void +static void llvmpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -76,7 +76,7 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, } -void +static void llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -104,7 +104,7 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, } -void +static void llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -133,7 +133,7 @@ llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, } -void +static void llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -163,7 +163,7 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, } -struct pipe_sampler_view * +static struct pipe_sampler_view * llvmpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) @@ -182,7 +182,7 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe, } -void +static void llvmpipe_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { @@ -191,7 +191,7 @@ llvmpipe_sampler_view_destroy(struct pipe_context *pipe, } -void +static void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { @@ -199,4 +199,16 @@ llvmpipe_delete_sampler_state(struct pipe_context *pipe, } - +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; + + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; + llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; + llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; + llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; + llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; + llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 7d86c5750c..63b8f27b39 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -52,8 +52,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); - assert(fb->width <= MAXWIDTH); - assert(fb->height <= MAXHEIGHT); + assert(fb->width <= LP_MAX_WIDTH); + assert(fb->height <= LP_MAX_HEIGHT); if (changed) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index f6427aa908..113f13db01 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" -void * +static void * llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -50,7 +50,7 @@ llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void +static void llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -65,13 +65,13 @@ llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem); } -void +static void llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); } -void +static void llvmpipe_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -87,3 +87,15 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(llvmpipe->draw, count, buffers); } + + + +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; + llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; + llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; + + llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 884e3878e6..f2d8808990 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -28,15 +28,17 @@ #include "pipe/p_defines.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "util/u_memory.h" #include "draw/draw_context.h" #include "lp_context.h" +#include "lp_debug.h" #include "lp_state.h" -void * +static void * llvmpipe_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -57,6 +59,11 @@ llvmpipe_create_vs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create vertex shader %p:\n", (void *) state); + tgsi_dump(templ->tokens, 0); + } + return state; fail: @@ -69,7 +76,7 @@ fail: } -void +static void llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -87,7 +94,7 @@ llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) } -void +static void llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -99,3 +106,13 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) FREE( (void *)state->shader.tokens ); FREE( state ); } + + + +void +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; + llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; + llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 1a116989d4..8bd83f576f 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -28,9 +28,9 @@ #include "util/u_rect.h" #include "lp_context.h" #include "lp_flush.h" +#include "lp_limits.h" #include "lp_surface.h" #include "lp_texture.h" -#include "lp_tile_size.h" /** @@ -59,19 +59,19 @@ lp_surface_copy(struct pipe_context *pipe, struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst->texture); const enum pipe_format format = src_tex->base.format; - llvmpipe_flush_texture(pipe, - dst->texture, dst->face, dst->level, - 0, /* flush_flags */ - FALSE, /* read_only */ - FALSE, /* cpu_access */ - FALSE); /* do_not_flush */ + llvmpipe_flush_resource(pipe, + dst->texture, dst->face, dst->level, + 0, /* flush_flags */ + FALSE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_block */ - llvmpipe_flush_texture(pipe, - src->texture, src->face, src->level, - 0, /* flush_flags */ - TRUE, /* read_only */ - FALSE, /* cpu_access */ - FALSE); /* do_not_flush */ + llvmpipe_flush_resource(pipe, + src->texture, src->face, src->level, + 0, /* flush_flags */ + TRUE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_block */ /* printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", @@ -146,7 +146,7 @@ lp_surface_copy(struct pipe_context *pipe, void -lp_init_surface_functions(struct llvmpipe_context *lp) +llvmpipe_init_surface_functions(struct llvmpipe_context *lp) { lp->pipe.surface_copy = lp_surface_copy; lp->pipe.surface_fill = util_surface_fill; diff --git a/src/gallium/drivers/llvmpipe/lp_surface.h b/src/gallium/drivers/llvmpipe/lp_surface.h index 4d78a53c4f..b1b896ebd9 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.h +++ b/src/gallium/drivers/llvmpipe/lp_surface.h @@ -36,7 +36,7 @@ struct llvmpipe_context; extern void -lp_init_surface_functions(struct llvmpipe_context *lp); +llvmpipe_init_surface_functions(struct llvmpipe_context *lp); #endif /* LP_SURFACE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 818f7a9a56..fae7bf3fcf 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -154,7 +154,6 @@ add_blend_test(LLVMModuleRef module, enum vector_mode mode, struct lp_type type) { - LLVMTypeRef ret_type; LLVMTypeRef vec_type; LLVMTypeRef args[4]; LLVMValueRef func; @@ -165,7 +164,6 @@ add_blend_test(LLVMModuleRef module, LLVMBasicBlockRef block; LLVMBuilderRef builder; - ret_type = LLVMInt64Type(); vec_type = lp_build_vec_type(type); args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 74b7393e4e..74b7393e4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index cee170ec83..2f41d620c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -47,7 +47,6 @@ #include "lp_tile_image.h" #include "lp_texture.h" #include "lp_setup.h" -#include "lp_tile_size.h" #include "state_tracker/sw_winsys.h" @@ -55,14 +54,18 @@ static INLINE boolean resource_is_texture(const struct pipe_resource *resource) { - const unsigned tex_binds = (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED | - PIPE_BIND_DEPTH_STENCIL | - PIPE_BIND_SAMPLER_VIEW); - const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource); - - return (lpr->base.bind & tex_binds) ? TRUE : FALSE; + switch (resource->target) { + case PIPE_BUFFER: + return FALSE; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + return TRUE; + default: + assert(0); + return FALSE; + } } @@ -81,7 +84,7 @@ alloc_layout_array(unsigned num_slices, unsigned width, unsigned height) assert(LP_TEX_LAYOUT_NONE == 0); /* calloc'ing LP_TEX_LAYOUT_NONE here */ return (enum lp_texture_layout *) - calloc(num_slices * tx * ty, sizeof(enum lp_texture_layout)); + CALLOC(num_slices * tx * ty, sizeof(enum lp_texture_layout)); } @@ -189,20 +192,20 @@ llvmpipe_resource_create(struct pipe_screen *_screen, assert(lpr->base.bind); - if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - /* displayable surface */ - if (!llvmpipe_displaytarget_layout(screen, lpr)) - goto fail; - assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); - } - else if (lpr->base.bind & (PIPE_BIND_SAMPLER_VIEW | - PIPE_BIND_DEPTH_STENCIL)) { - /* texture map */ - if (!llvmpipe_texture_layout(screen, lpr)) - goto fail; - assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + if (resource_is_texture(&lpr->base)) { + if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) { + /* displayable surface */ + if (!llvmpipe_displaytarget_layout(screen, lpr)) + goto fail; + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + } + else { + /* texture map */ + if (!llvmpipe_texture_layout(screen, lpr)) + goto fail; + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + } + assert(lpr->layout[0]); } else { /* other data (vertex buffer, const buffer, etc) */ @@ -217,10 +220,6 @@ llvmpipe_resource_create(struct pipe_screen *_screen, goto fail; } - if (resource_is_texture(&lpr->base)) { - assert(lpr->layout[0]); - } - lpr->id = id_counter++; return &lpr->base; @@ -242,6 +241,13 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, /* display target */ struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpr->dt); + + if (lpr->tiled[0].data) { + align_free(lpr->tiled[0].data); + lpr->tiled[0].data = NULL; + } + + FREE(lpr->layout[0]); } else if (resource_is_texture(pt)) { /* regular texture */ @@ -265,7 +271,7 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, /* free layout flag arrays */ for (level = 0; level < Elements(lpr->tiled); level++) { - free(lpr->layout[level]); + FREE(lpr->layout[level]); lpr->layout[level] = NULL; } } @@ -389,10 +395,7 @@ llvmpipe_resource_data(struct pipe_resource *resource) { struct llvmpipe_resource *lpr = llvmpipe_resource(resource); - assert((lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED | - PIPE_BIND_SAMPLER_VIEW)) == 0); + assert(!resource_is_texture(resource)); return lpr->data; } @@ -496,6 +499,27 @@ llvmpipe_get_transfer(struct pipe_context *pipe, assert(resource); assert(sr.level <= resource->last_level); + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + boolean read_only = !(usage & PIPE_TRANSFER_WRITE); + boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); + if (!llvmpipe_flush_resource(pipe, resource, + sr.face, sr.level, + 0, /* flush_flags */ + read_only, + TRUE, /* cpu_access */ + do_not_block)) { + /* + * It would have blocked, but state tracker requested no to. + */ + assert(do_not_block); + return NULL; + } + } + lpr = CALLOC_STRUCT(llvmpipe_transfer); if (lpr) { struct pipe_transfer *pt = &lpr->base; @@ -566,19 +590,6 @@ llvmpipe_transfer_map( struct pipe_context *pipe, lpr = llvmpipe_resource(transfer->resource); format = lpr->base.format; - /* - * Transfers, like other pipe operations, must happen in order, so flush the - * context if necessary. - */ - llvmpipe_flush_texture(pipe, - transfer->resource, - transfer->sr.face, - transfer->sr.level, - 0, /* flush_flags */ - !(transfer->usage & PIPE_TRANSFER_WRITE), /* read_only */ - TRUE, /* cpu_access */ - FALSE); /* do_not_flush */ - map = llvmpipe_resource_map(transfer->resource, transfer->sr.face, transfer->sr.level, @@ -994,14 +1005,16 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, x * TILE_SIZE, y * TILE_SIZE, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } else { lp_tiled_to_linear(other_data, target_data, x * TILE_SIZE, y * TILE_SIZE, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } } @@ -1090,7 +1103,8 @@ llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr, if (convert) { lp_tiled_to_linear(tiled_image, linear_image, x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } if (new_layout != cur_layout) @@ -1138,7 +1152,8 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, if (convert) { lp_linear_to_tiled(linear_image, tiled_image, x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } if (new_layout != cur_layout) @@ -1152,6 +1167,27 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, } +/** + * Return size of resource in bytes + */ +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource) +{ + const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource); + unsigned lvl, size = 0; + + for (lvl = 0; lvl <= lpr->base.last_level; lvl++) { + if (lpr->linear[lvl].data) + size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_LINEAR); + + if (lpr->tiled[lvl].data) + size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_TILED); + } + + return size; +} + + void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 858975bcee..a8d08d6247 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -31,12 +31,7 @@ #include "pipe/p_state.h" #include "util/u_debug.h" - - -#define LP_MAX_TEXTURE_2D_LEVELS 12 /* 2K x 2K for now */ -#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ - -#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS +#include "lp_limits.h" enum lp_texture_usage @@ -189,6 +184,10 @@ void * llvmpipe_resource_data(struct pipe_resource *resource); +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource); + + ubyte * llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, unsigned face_slice, unsigned level, diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index 0852150ba7..2b63992dd7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -122,14 +122,15 @@ tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride) /** * Convert a tiled image into a linear image. - * \param src_stride source row stride in bytes (bytes per row of tiles) * \param dst_stride dest row stride in bytes */ void lp_tiled_to_linear(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned dst_stride) + enum pipe_format format, + unsigned dst_stride, + unsigned tiles_per_row) { assert(x % TILE_SIZE == 0); assert(y % TILE_SIZE == 0); @@ -191,8 +192,6 @@ lp_tiled_to_linear(const void *src, void *dst, const uint bpp = 4; const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; const uint bytes_per_tile = tile_w * tile_h * bpp; - const uint src_stride = dst_stride * tile_w; - const uint tiles_per_row = src_stride / bytes_per_tile; uint i, j; for (j = 0; j < height; j += tile_h) { @@ -202,7 +201,7 @@ lp_tiled_to_linear(const void *src, void *dst, uint byte_offset = tile_offset * bytes_per_tile; const uint8_t *src_tile = (uint8_t *) src + byte_offset; - lp_tile_write_4ub(format, + lp_tile_unswizzle_4ub(format, src_tile, dst, dst_stride, ii, jj, tile_w, tile_h); @@ -215,13 +214,14 @@ lp_tiled_to_linear(const void *src, void *dst, /** * Convert a linear image into a tiled image. * \param src_stride source row stride in bytes - * \param dst_stride dest row stride in bytes (bytes per row of tiles) */ void lp_linear_to_tiled(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned src_stride) + enum pipe_format format, + unsigned src_stride, + unsigned tiles_per_row) { assert(x % TILE_SIZE == 0); assert(y % TILE_SIZE == 0); @@ -281,8 +281,6 @@ lp_linear_to_tiled(const void *src, void *dst, const uint bpp = 4; const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; const uint bytes_per_tile = tile_w * tile_h * bpp; - const uint dst_stride = src_stride * tile_w; - const uint tiles_per_row = dst_stride / bytes_per_tile; uint i, j; for (j = 0; j < height; j += TILE_SIZE) { @@ -292,7 +290,7 @@ lp_linear_to_tiled(const void *src, void *dst, uint byte_offset = tile_offset * bytes_per_tile; uint8_t *dst_tile = (uint8_t *) dst + byte_offset; - lp_tile_read_4ub(format, + lp_tile_swizzle_4ub(format, dst_tile, src, src_stride, ii, jj, tile_w, tile_h); @@ -320,10 +318,10 @@ test_tiled_linear_conversion(void *data, /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/ lp_linear_to_tiled(data, tiled, 0, 0, width, height, format, - stride); + stride, wt); lp_tiled_to_linear(tiled, data, 0, 0, width, height, format, - stride); + stride, wt); free(tiled); } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h index d74621925d..8de8efc6c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -33,14 +33,18 @@ void lp_tiled_to_linear(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned dst_stride); + enum pipe_format format, + unsigned dst_stride, + unsigned tiles_per_row); void lp_linear_to_tiled(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned src_stride); + enum pipe_format format, + unsigned src_stride, + unsigned tiles_per_row); void diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 9d6a88afec..07f71b8411 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" #include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ -#include "lp_tile_size.h" +#include "lp_limits.h" #ifdef __cplusplus extern "C" { @@ -51,7 +51,10 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; #define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 -extern int tile_write_count, tile_read_count; +#ifdef DEBUG +extern unsigned lp_tile_unswizzle_count; +extern unsigned lp_tile_swizzle_count; +#endif /** @@ -73,14 +76,14 @@ tile_pixel_offset(unsigned x, unsigned y, unsigned c) void -lp_tile_read_4ub(enum pipe_format format, +lp_tile_swizzle_4ub(enum pipe_format format, uint8_t *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h); void -lp_tile_write_4ub(enum pipe_format format, +lp_tile_unswizzle_4ub(enum pipe_format format, const uint8_t *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 65810b6f8f..5ab63cbac6 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -58,7 +58,7 @@ def is_format_supported(format): channel = format.channels[i] if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): return False - if channel.type == FLOAT and channel.size not in (32 ,64): + if channel.type == FLOAT and channel.size not in (16, 32 ,64): return False if format.colorspace not in ('rgb', 'srgb'): @@ -75,7 +75,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_native_type = native_type(format) print 'static void' - print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) + print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) print '{' print ' unsigned x, y;' print ' const uint8_t *src_row = src + y0*src_stride;' @@ -193,7 +193,7 @@ def pack_rgba(format, src_channel, r, g, b, a): return expr -def emit_unrolled_write_code(format, src_channel): +def emit_unrolled_unswizzle_code(format, src_channel): '''Emit code for writing a block based on unrolled loops. This is considerably faster than the TILE_PIXEL-based code below. ''' @@ -223,7 +223,7 @@ def emit_unrolled_write_code(format, src_channel): print ' }' -def emit_tile_pixel_write_code(format, src_channel): +def emit_tile_pixel_unswizzle_code(format, src_channel): '''Emit code for writing a block based on the TILE_PIXEL macro.''' dst_native_type = native_type(format) @@ -257,7 +257,7 @@ def emit_tile_pixel_write_code(format, src_channel): value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) print ' *dst_pixel++ = %s;' % value - else: + elif dst_channel.size: print ' ++dst_pixel;' else: assert False @@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static void' - print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) print '{' if format.layout == PLAIN \ and format.colorspace == 'rgb' \ @@ -282,14 +282,14 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): and not format.is_mixed() \ and (format.channels[0].type == UNSIGNED \ or format.channels[1].type == UNSIGNED): - emit_unrolled_write_code(format, src_channel) + emit_unrolled_unswizzle_code(format, src_channel) else: - emit_tile_pixel_write_code(format, src_channel) + emit_tile_pixel_unswizzle_code(format, src_channel) print '}' print -def generate_read(formats, dst_channel, dst_native_type, dst_suffix): +def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' for format in formats: @@ -297,15 +297,17 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' - print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) + print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) print '{' print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type - print ' tile_read_count += 1;' + print '#ifdef DEBUG' + print ' lp_tile_swizzle_count += 1;' + print '#endif' print ' switch(format) {' for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix) + print ' func = &lp_tile_%s_swizzle_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' @@ -316,7 +318,7 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): print -def generate_write(formats, src_channel, src_native_type, src_suffix): +def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): '''Generate the dispatch function to write pixels to any format''' for format in formats: @@ -324,16 +326,18 @@ def generate_write(formats, src_channel, src_native_type, src_suffix): generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' - print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) + print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) print '{' print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type - print ' tile_write_count += 1;' + print '#ifdef DEBUG' + print ' lp_tile_unswizzle_count += 1;' + print '#endif' print ' switch(format) {' for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix) + print ' func = &lp_tile_%s_unswizzle_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' @@ -360,7 +364,10 @@ def main(): print '#include "util/u_half.h"' print '#include "lp_tile_soa.h"' print - print 'int tile_write_count=0, tile_read_count=0;' + print '#ifdef DEBUG' + print 'unsigned lp_tile_unswizzle_count = 0;' + print 'unsigned lp_tile_swizzle_count = 0;' + print '#endif' print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' @@ -388,8 +395,8 @@ def main(): native_type = 'uint8_t' suffix = '4ub' - generate_read(formats, channel, native_type, suffix) - generate_write(formats, channel, native_type, suffix) + generate_swizzle(formats, channel, native_type, suffix) + generate_unswizzle(formats, channel, native_type, suffix) if __name__ == '__main__': diff --git a/src/gallium/drivers/llvmpipe/sp2lp.sh b/src/gallium/drivers/llvmpipe/sp2lp.sh deleted file mode 100755 index c45a81ce3c..0000000000 --- a/src/gallium/drivers/llvmpipe/sp2lp.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh -# -# Port changes from softpipe to llvmpipe. Invoke as -# -# sp2lp.sh <commit> -# -# Note that this will only affect llvmpipe -- you still need to actually -# cherry-pick/merge the softpipe changes themselves if they affect directories -# outside src/gallium/drivers/softpipe - -git format-patch \ - --keep-subject \ - --relative=src/gallium/drivers/softpipe \ - --src-prefix=a/src/gallium/drivers/llvmpipe/ \ - --dst-prefix=b/src/gallium/drivers/llvmpipe/ \ - --stdout "$1^1..$1" \ -| sed \ - -e 's/\<softpipe\>/llvmpipe/g' \ - -e 's/\<sp\>/lp/g' \ - -e 's/\<softpipe_/llvmpipe_/g' \ - -e 's/\<sp_/lp_/g' \ - -e 's/\<SP_/LP_/g' \ - -e 's/\<SOFTPIPE_/LLVMPIPE_/g' \ - -e 's/\<spt\>/lpt/g' \ - -e 's/\<sps\>/lps/g' \ - -e 's/\<spfs\>/lpfs/g' \ - -e 's/\<sptex\>/lptex/g' \ - -e 's/\<setup_\(point\|line\|tri\)\>/llvmpipe_\0/g' \ - -e 's/\<llvmpipe_cached_tile\>/llvmpipe_cached_tex_tile/g' \ - -e 's/_get_cached_tile_tex\>/_get_cached_tex_tile/g' \ - -e 's/\<TILE_SIZE\>/TEX_TILE_SIZE/g' \ - -e 's/\<tile_address\>/tex_tile_address/g' \ - -e 's/\<tile->data\.color\>/tile->color/g' \ -| patch -p1 diff --git a/src/gallium/drivers/nouveau/SConscript b/src/gallium/drivers/nouveau/SConscript new file mode 100644 index 0000000000..fe7af4d2ae --- /dev/null +++ b/src/gallium/drivers/nouveau/SConscript @@ -0,0 +1,11 @@ +Import('*') + +env = env.Clone() + +nouveau = env.ConvenienceLibrary( + target = 'nouveau', + source = [ + 'nouveau_screen.c', + ]) + +Export('nouveau') diff --git a/src/gallium/drivers/nv50/SConscript b/src/gallium/drivers/nv50/SConscript new file mode 100644 index 0000000000..8625f92622 --- /dev/null +++ b/src/gallium/drivers/nv50/SConscript @@ -0,0 +1,26 @@ +Import('*') + +env = env.Clone() + +nv50 = env.ConvenienceLibrary( + target = 'nv50', + source = [ + 'nv50_buffer.c', + 'nv50_clear.c', + 'nv50_context.c', + 'nv50_draw.c', + 'nv50_miptree.c', + 'nv50_query.c', + 'nv50_program.c', + 'nv50_resource.c', + 'nv50_screen.c', + 'nv50_state.c', + 'nv50_state_validate.c', + 'nv50_surface.c', + 'nv50_tex.c', + 'nv50_transfer.c', + 'nv50_vbo.c', + 'nv50_push.c', + ]) + +Export('nv50') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b8b6b12120..0156ff95ff 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3169,15 +3169,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (pc->p->type == PIPE_SHADER_FRAGMENT) nv50_fp_move_results(pc); - /* last insn must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - else - if (is_immd(pc->p->exec_tail) || + if (!pc->p->exec_tail || + is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail) || is_control_flow(pc->p->exec_tail)) emit_nop(pc); + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ terminate_mbb(pc); @@ -4162,7 +4163,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) struct pipe_transfer *transfer; if (!p->data[0] && p->immd_nr) { - struct nouveau_resource *heap = nv50->screen->immd_heap[0]; + struct nouveau_resource *heap = nv50->screen->immd_heap; if (nouveau_resource_alloc(heap, p->immd_nr, p, &p->data[0])) { while (heap->next && heap->size < p->immd_nr) { @@ -4180,7 +4181,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - assert(p->param_nr <= 512); + assert(p->param_nr <= 16384); if (p->param_nr) { unsigned cb; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ad17991be9..2dd1042424 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -190,9 +190,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->tesla); nouveau_grobj_free(&screen->eng2d); nouveau_grobj_free(&screen->m2mf); - nouveau_resource_destroy(&screen->immd_heap[0]); - nouveau_resource_destroy(&screen->parm_heap[0]); - nouveau_resource_destroy(&screen->parm_heap[1]); + nouveau_resource_destroy(&screen->immd_heap); nouveau_screen_fini(&screen->base); FREE(screen); } @@ -242,7 +240,7 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOC (chan, screen->constbuf_parm[i], - ((NV50_CB_PVP + i) << 16) | 0x0800, rl, 0, 0); + ((NV50_CB_PVP + i) << 16) | 0x0000, rl, 0, 0); } } @@ -411,7 +409,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200); for (i = 0; i < 3; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (4096 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -420,14 +418,12 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0800); + /* CB_DEF_SET_SIZE value of 0x0000 means 65536 */ + OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0000); } - if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[0], 0, 512) || - nouveau_resource_init(&screen->parm_heap[1], 0, 512)) - { - NOUVEAU_ERR("Error initialising constant buffers.\n"); + if (nouveau_resource_init(&screen->immd_heap, 0, 128)) { + NOUVEAU_ERR("Error initialising shader immediates heap.\n"); nv50_screen_destroy(pscreen); return NULL; } diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 40ebbee72e..fbf15a7596 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -20,8 +20,7 @@ struct nv50_screen { struct nouveau_bo *constbuf_misc[1]; struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; - struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; + struct nouveau_resource *immd_heap; struct pipe_resource *strm_vbuf[16]; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index c2d9e83526..d905d95354 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -31,6 +31,27 @@ #include "util/u_tile.h" #include "util/u_format.h" +/* return TRUE for formats that can be converted among each other by NV50_2D */ +static INLINE boolean +nv50_2d_format_faithful(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + return TRUE; + default: + return FALSE; + } +} + static INLINE int nv50_format(enum pipe_format format) { @@ -47,9 +68,12 @@ nv50_format(enum pipe_format format) return NV50_2D_DST_FORMAT_R5G6B5_UNORM; case PIPE_FORMAT_B5G5R5A1_UNORM: return NV50_2D_DST_FORMAT_A1R5G5B5_UNORM; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return NV50_2D_DST_FORMAT_A2R10G10B10_UNORM; case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: return NV50_2D_DST_FORMAT_R8_UNORM; case PIPE_FORMAT_R32G32B32A32_FLOAT: return NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT; @@ -178,7 +202,9 @@ nv50_surface_copy(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; - assert(src->format == dest->format); + assert((src->format == dest->format) || + (nv50_2d_format_faithful(src->format) && + nv50_2d_format_faithful(dest->format))); nv50_surface_do_copy(screen, dest, destx, desty, src, srcx, srcy, width, height); diff --git a/src/gallium/drivers/nvfx/SConscript b/src/gallium/drivers/nvfx/SConscript new file mode 100644 index 0000000000..02d931b10e --- /dev/null +++ b/src/gallium/drivers/nvfx/SConscript @@ -0,0 +1,40 @@ +Import('*') + +env = env.Clone() + +env.PrependUnique(delete_existing=1, CPPPATH = [ + '#/src/gallium/drivers', +]) + +nvfx = env.ConvenienceLibrary( + target = 'nvfx', + source = [ + 'nv04_surface_2d.c', + 'nvfx_buffer.c', + 'nvfx_context.c', + 'nvfx_clear.c', + 'nvfx_draw.c', + 'nvfx_fragprog.c', + 'nvfx_fragtex.c', + 'nv30_fragtex.c', + 'nv40_fragtex.c', + 'nvfx_miptree.c', + 'nvfx_query.c', + 'nvfx_resource.c', + 'nvfx_screen.c', + 'nvfx_state.c', + 'nvfx_state_blend.c', + 'nvfx_state_emit.c', + 'nvfx_state_fb.c', + 'nvfx_state_rasterizer.c', + 'nvfx_state_scissor.c', + 'nvfx_state_stipple.c', + 'nvfx_state_viewport.c', + 'nvfx_state_zsa.c', + 'nvfx_surface.c', + 'nvfx_transfer.c', + 'nvfx_vbo.c', + 'nvfx_vertprog.c', + ]) + +Export('nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 4d7b7f181d..520bae5aed 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -8,6 +8,7 @@ #include "nvfx_resource.h" #include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" #include "nouveau/nouveau_pushbuf.h" #include "nouveau/nouveau_util.h" diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 5a8e00f15a..d3cd6bef96 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ r300_emit.c \ r300_flush.c \ r300_fs.c \ + r300_hyperz.c \ r300_query.c \ r300_render.c \ r300_resource.c \ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 08aec427a1..3921085d76 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -21,6 +21,7 @@ r300 = env.ConvenienceLibrary( 'r300_emit.c', 'r300_flush.c', 'r300_fs.c', + 'r300_hyperz.c', 'r300_query.c', 'r300_render.c', 'r300_resource.c', diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index deaa03e1f6..e84bce0010 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -37,14 +37,27 @@ #include "r300_state_invariant.h" #include "r300_winsys.h" +#include <inttypes.h> + static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; + struct r300_atom *atom; util_blitter_destroy(r300->blitter); draw_destroy(r300->draw); + /* Print stats, if enabled. */ + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + fprintf(stderr, "r300: Stats for context %p:\n", r300); + fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter); + foreach(atom, &r300->atom_list) { + fprintf(stderr, " : %s: %" PRIu64 " emits\n", + atom->name, atom->counter); + } + } + /* Free the OQ BO. */ context->screen->resource_destroy(context->screen, r300->oqbo); @@ -63,7 +76,6 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); - FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -112,7 +124,6 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(vertex_stream_state, 0); - R300_INIT_ATOM(vap_output_state, 6); R300_INIT_ATOM(pvs_flush, 2); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -136,7 +147,6 @@ static void r300_setup_atoms(struct r300_context* r300) r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); - r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1e4fd9e5ed..e9c8fcdc15 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -45,6 +45,8 @@ struct r300_atom { struct r300_atom *prev, *next; /* Name, for debugging. */ const char* name; + /* Stat counter. */ + uint64_t counter; /* Opaque state. */ void* state; /* Emit the state to the context. */ @@ -117,6 +119,10 @@ struct r300_rs_state { }; struct r300_rs_block { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ + uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ uint32_t count; /* R300_RS_COUNT */ uint32_t inst_count; /* R300_RS_INST_COUNT */ @@ -188,12 +194,6 @@ struct r300_vertex_stream_state { unsigned count; }; -struct r300_vap_output_state { - uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ - uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ - uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ -}; - struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -255,6 +255,10 @@ struct r300_texture { /* A pitch for each mip-level */ unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* A pitch multiplied by blockwidth as hardware wants + * the number of pixels instead of the number of blocks. */ + unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; @@ -375,7 +379,7 @@ struct r300_context { struct r300_atom query_start; /* Rasterizer state. */ struct r300_atom rs_state; - /* RS block state. */ + /* RS block state + VAP (vertex shader) output mapping state. */ struct r300_atom rs_block_state; /* Scissor state. */ struct r300_atom scissor_state; @@ -383,8 +387,6 @@ struct r300_context { struct r300_atom textures_state; /* Vertex stream formatting state. */ struct r300_atom vertex_stream_state; - /* VAP (vertex shader) output mapping state. */ - struct r300_atom vap_output_state; /* Vertex shader. */ struct r300_atom vs_state; /* Vertex shader constant buffer. */ @@ -418,6 +420,9 @@ struct r300_context { struct pipe_viewport_state viewport; + /* Stream locations for SWTCL. */ + int stream_loc_notcl[16]; + /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; /* Whether polygon offset is enabled. */ @@ -435,6 +440,9 @@ struct r300_context { /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; + + /* Stat counter. */ + uint64_t flush_counter; }; /* Convenience cast wrapper. */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 456b2ec7b9..996a4f491e 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -104,6 +104,13 @@ cs_count--; \ } while (0) +#define OUT_CS_TABLE(values, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \ + cs_winsys->write_cs_table(cs_winsys, values, count); \ + cs_count -= count; \ +} while (0) + #define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ @@ -150,6 +157,9 @@ DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \ + r300->flush_counter++; \ + } \ cs_winsys->flush_cs(cs_winsys); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 6e84bf8246..4c2836f36a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,9 +38,11 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, + { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, + { "stats", DBG_STATS, "Gather statistics (for lulz)" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 19acdaba62..23bbc6a99c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -83,7 +83,6 @@ void r300_emit_clip_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; - int i; CS_LOCALS(r300); if (r300->screen->caps.has_tcl) { @@ -92,12 +91,7 @@ void r300_emit_clip_state(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - for (i = 0; i < 6; i++) { - OUT_CS_32F(clip->ucp[i][0]); - OUT_CS_32F(clip->ucp[i][1]); - OUT_CS_32F(clip->ucp[i][2]); - OUT_CS_32F(clip->ucp[i][3]); - } + OUT_CS_TABLE(clip->ucp, 6 * 4); OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; @@ -106,7 +100,6 @@ void r300_emit_clip_state(struct r300_context* r300, OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } - } void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) @@ -244,8 +237,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); - for(i = 0; i < 4; ++i) - OUT_CS(code->code_addr[i]); + OUT_CS_TABLE(code->code_addr, 4); OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); for (i = 0; i < code->alu.length; i++) @@ -265,8 +257,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) if (code->tex.length) { OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - for(i = 0; i < code->tex.length; ++i) - OUT_CS(code->tex.inst[i]); + OUT_CS_TABLE(code->tex.inst, code->tex.length); } /* Emit immediates. */ @@ -396,10 +387,7 @@ void r500_emit_fs(struct r300_context* r300, unsigned size, void *state) R500_GA_US_VECTOR_INDEX_TYPE_CONST | (i & R500_GA_US_VECTOR_INDEX_MASK)); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } } @@ -424,15 +412,9 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); for(i = 0; i < count; ++i) { - const float *data; assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - data = buf->constants[i]; - - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -459,10 +441,7 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo R500_GA_US_VECTOR_INDEX_TYPE_CONST | (i & R500_GA_US_VECTOR_INDEX_MASK)); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } END_CS; @@ -738,13 +717,20 @@ void r300_emit_rs_block_state(struct r300_context* r300, DBG(r300, DBG_DRAW, "r300: RS emit:\n"); BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(rs->vap_vtx_state_cntl); + OUT_CS(rs->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(rs->vap_out_vtx_fmt[0]); + OUT_CS(rs->vap_out_vtx_fmt[1]); + if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { OUT_CS_REG_SEQ(R300_RS_IP_0, count); } + OUT_CS_TABLE(rs->ip, count); for (i = 0; i < count; i++) { - OUT_CS(rs->ip[i]); DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); } @@ -757,8 +743,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, } else { OUT_CS_REG_SEQ(R300_RS_INST_0, count); } + OUT_CS_TABLE(rs->inst, count); for (i = 0; i < count; i++) { - OUT_CS(rs->inst[i]); DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); } @@ -823,7 +809,7 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -void r300_emit_aos(struct r300_context* r300, unsigned offset) +void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; @@ -832,9 +818,18 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); + for (i = 0; i < aos_count; i++) { + if ((vbuf[velem[i].vertex_buffer_index].buffer_offset + velem[i].src_offset) % 4 != 0) { + /* XXX We must align the buffer. */ + assert(0); + fprintf(stderr, "r300: Unaligned vertex buffer offsets aren't supported, aborting..\n"); + abort(); + } + } + BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count); + OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); for (i = 0; i < aos_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; @@ -899,39 +894,20 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); + OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); for (i = 0; i < streams->count; i++) { - OUT_CS(streams->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, streams->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); + OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); for (i = 0; i < streams->count; i++) { - OUT_CS(streams->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, streams->vap_prog_stream_cntl_ext[i]); } END_CS; } -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state) -{ - struct r300_vap_output_state *vap_out_state = - (struct r300_vap_output_state*)state; - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); - - BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vap_out_state->vap_vtx_state_cntl); - OUT_CS(vap_out_state->vap_vsm_vtx_assm); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); - OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); - END_CS; -} - void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); @@ -978,9 +954,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); - for (i = 0; i < code->length; i++) { - OUT_CS(code->body.d[i]); - } + OUT_CS_TABLE(code->body.d, code->length); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | @@ -997,10 +971,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4); for (i = imm_first; i < imm_end; i++) { const float *data = vs->code.constants.Constants[i].u.Immediate; - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } END_CS; @@ -1009,7 +980,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state) { - unsigned i; unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; @@ -1023,13 +993,7 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - for (i = 0; i < count; i++) { - const float *data = buf->constants[i]; - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -1188,6 +1152,11 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) } } + /* emit_query_end is not atomized. */ + dwords += 26; + /* let's reserve some more, just in case */ + dwords += 32; + return dwords; } @@ -1200,6 +1169,9 @@ void r300_emit_dirty_state(struct r300_context* r300) foreach(atom, &r300->atom_list) { if (atom->dirty) { atom->emit(r300, atom->size, atom->state); + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + atom->counter++; + } atom->dirty = FALSE; } } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 56f7318cdb..3c0edf6fdc 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -29,7 +29,7 @@ struct rX00_fragment_program_code; struct r300_vertex_program_code; -void r300_emit_aos(struct r300_context* r300, unsigned offset); +void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); @@ -81,9 +81,6 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state); - void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4d61f63853..88303f074c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -275,6 +275,14 @@ static void r300_translate_fragment_shader( /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); + /* Shaders with zero instructions are invalid, + * use the dummy shader instead. */ + if (shader->code.code.r500.inst_end == -1) { + rc_destroy(&compiler.Base); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (compiler.Base.Error) { fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" " instead.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c new file mode 100644 index 0000000000..b41b6b1508 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -0,0 +1,108 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + + +#include "r300_hyperz.h" +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_fs.h" + +/*****************************************************************************/ +/* The ZTOP state */ +/*****************************************************************************/ + +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} + +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; + + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; +} + +static void r300_update_ztop(struct r300_context* r300) +{ + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; + + /* This is important enough that I felt it warranted a comment. + * + * According to the docs, these are the conditions where ZTOP must be + * disabled: + * 1) Alpha testing enabled + * 2) Texture kill instructions in fragment shader + * 3) Chroma key culling enabled + * 4) W-buffering enabled + * + * The docs claim that for the first three cases, if no ZS writes happen, + * then ZTOP can be used. + * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * + * Additionally, the following conditions require disabled ZTOP: + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries + * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * + * ~C. + */ + + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ + r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + } + + r300->ztop_state.dirty = TRUE; +} + +void r300_update_hyperz_state(struct r300_context* r300) +{ + r300_update_ztop(r300); +} diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h new file mode 100644 index 0000000000..3df5053b89 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -0,0 +1,30 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_HYPERZ_H +#define R300_HYPERZ_H + +struct r300_context; + +void r300_update_hyperz_state(struct r300_context* r300); + +#endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 239f91443f..675a9317f9 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3377,7 +3377,7 @@ enum { * the last block is omitted. */ #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 - +# define R300_VC_FORCE_PREFETCH (1 << 5) # define R300_VBPNTR_SIZE0(x) ((x) >> 2) # define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8) # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 23b61df89c..7c3a7902a4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -41,9 +41,6 @@ #include "r300_render.h" #include "r300_state_derived.h" -/* XXX The DRM rejects VAP_ALT_NUM_VERTICES.. */ -//#define ENABLE_ALT_NUM_VERTS - static uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { @@ -169,6 +166,24 @@ static boolean immd_is_good_idea(struct r300_context *r300, * after resolving fallback issues (e.g. stencil ref two-sided). * ****************************************************************************/ +static boolean r500_emit_index_offset(struct r300_context *r300, int indexBias) +{ + CS_LOCALS(r300); + + if (r300->screen->caps.is_r500 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + BEGIN_CS(2); + OUT_CS_REG(R500_VAP_INDEX_OFFSET, + (indexBias & 0xFFFFFF) | (indexBias < 0 ? 1<<24 : 0)); + END_CS; + } else { + if (indexBias) + return FALSE; /* Can't do anything :( */ + } + + return TRUE; +} + void r500_emit_draw_arrays_immediate(struct r300_context *r300, unsigned mode, unsigned start, @@ -220,10 +235,12 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2 + dwords); r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); + r500_emit_index_offset(r300, 0); + BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -265,23 +282,20 @@ void r500_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) { -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; -#else - boolean alt_num_verts = FALSE; -#endif CS_LOCALS(r300); + if (count >= (1 << 24)) { + fprintf(stderr, "r300: Got a huge number of vertices: %i, " + "refusing to render.\n", count); + return; + } + + r500_emit_index_offset(r300, 0); + + BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { - if (count >= (1 << 24)) { - fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); - return; - } - BEGIN_CS(9); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } else { - BEGIN_CS(7); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -307,11 +321,7 @@ void r500_emit_draw_elements(struct r300_context *r300, { uint32_t count_dwords; uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; -#else - boolean alt_num_verts = FALSE; -#endif CS_LOCALS(r300); if (count >= (1 << 24)) { @@ -320,18 +330,20 @@ void r500_emit_draw_elements(struct r300_context *r300, return; } - assert(indexBias == 0); - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); + if (!r500_emit_index_offset(r300, indexBias)) { + fprintf(stderr, "r300: Got a non-zero index bias, " + "refusing to render.\n"); + return; + } + + BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { - BEGIN_CS(15); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } else { - BEGIN_CS(13); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -541,12 +553,9 @@ void r300_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_resource* orgIndexBuffer = indexBuffer; -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; -#else - boolean alt_num_verts = FALSE; -#endif + count > 65536 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; if (r300->skip_rendering) { @@ -574,7 +583,7 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + r300_emit_aos(r300, 0, TRUE); u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); @@ -591,11 +600,12 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; - /* 16 spare dwords are enough for emit_draw_elements. */ - if (count && r300_reserve_cs_space(r300, 16)) { + /* 16 spare dwords are enough for emit_draw_elements. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 74)) { r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + r300_emit_aos(r300, 0, TRUE); } } while (count); } @@ -622,12 +632,9 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; -#else - boolean alt_num_verts = FALSE; -#endif + count > 65536 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; if (r300->skip_rendering) { @@ -650,20 +657,21 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); if (alt_num_verts || count <= 65535) { - r300_emit_aos(r300, start); + r300_emit_aos(r300, start, FALSE); r300->emit_draw_arrays(r300, mode, count); } else { do { short_count = MIN2(count, 65535); - r300_emit_aos(r300, start); + r300_emit_aos(r300, start, FALSE); r300->emit_draw_arrays(r300, mode, short_count); start += short_count; count -= short_count; /* Again, we emit both AOS and draw_arrays so there should be - * at least 128 spare dwords. */ - if (count && r300_reserve_cs_space(r300, 128)) { + * at least 128 spare dwords. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 186)) { r300_emit_buffer_validate(r300, TRUE, NULL); r300_emit_dirty_state(r300); } @@ -896,6 +904,8 @@ static void r500_render_draw_arrays(struct vbuf_render* render, DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); + r500_emit_index_offset(r300, 0); + BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | @@ -918,6 +928,8 @@ static void r500_render_draw(struct vbuf_render* render, r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); + r500_emit_index_offset(r300, 0); + BEGIN_CS(dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8fc1d5aa00..c039126703 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "util/u_format.h" +#include "util/u_format_s3tc.h" #include "util/u_memory.h" #include "r300_context.h" @@ -319,6 +320,8 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) r300_init_screen_resource_functions(r300screen); + util_format_s3tc_init(); + return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 330bd9b36b..735c233c9e 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -71,6 +71,8 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_ANISOHQ 0x0000080 #define DBG_NO_TILING 0x0000100 #define DBG_NO_IMMD 0x0000200 +#define DBG_STATS 0x0000400 +#define DBG_RS 0x0000800 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9eb8539a65..d31e7c53f7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -538,46 +538,12 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ -static void r300_fb_update_tiling_flags(struct r300_context *r300, +static void r300_fb_set_tiling_flags(struct r300_context *r300, const struct pipe_framebuffer_state *old_state, const struct pipe_framebuffer_state *new_state) { struct r300_texture *tex; - unsigned i, j, level; - - /* Reset tiling flags for old surfaces to default values. */ - for (i = 0; i < old_state->nr_cbufs; i++) { - for (j = 0; j < new_state->nr_cbufs; j++) { - if (old_state->cbufs[i]->texture == new_state->cbufs[j]->texture) { - break; - } - } - /* If not binding the surface again... */ - if (j != new_state->nr_cbufs) { - continue; - } - - tex = r300_texture(old_state->cbufs[i]->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } - if (old_state->zsbuf && - (!new_state->zsbuf || - old_state->zsbuf->texture != new_state->zsbuf->texture)) { - tex = r300_texture(old_state->zsbuf->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } + unsigned i, level; /* Set tiling flags for new surfaces. */ for (i = 0; i < new_state->nr_cbufs; i++) { @@ -585,7 +551,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->cbufs[i]->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -594,7 +560,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->zsbuf->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -644,7 +610,8 @@ static void r300->dsa_state.dirty = TRUE; } - r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); + /* The tiling flags are dependent on the surface miplevel, unfortunately. */ + r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); @@ -719,10 +686,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_mark_fs_code_dirty(r300); r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ - - if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { - r300->vap_output_state.dirty = TRUE; - } } /* Delete fragment shader state. */ @@ -1072,11 +1035,9 @@ r300_create_sampler_view(struct pipe_context *pipe, swizzle[2] = templ->swizzle_b; swizzle[3] = templ->swizzle_a; - /* XXX Enable swizzles when they become supported. Now we get RGBA - * everywhere. And do testing! */ view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - 0); /*swizzle);*/ + swizzle); if (r300_screen(pipe->screen)->caps.is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } @@ -1271,6 +1232,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, { struct r300_vertex_element_state *velems; unsigned i, size; + enum pipe_format *format; assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); @@ -1279,21 +1241,88 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Check if the format is aligned to the size of DWORD. */ + r300_vertex_psc(velems); + + /* Check if the format is aligned to the size of DWORD. + * We only care about the blocksizes of the formats since + * swizzles are already set up. */ for (i = 0; i < count; i++) { - size = util_format_get_blocksize(attribs[i].src_format); + format = &velems->velem[i].src_format; + + /* Replace some formats with their aligned counterparts, + * this is OK because we check for aligned strides too. */ + switch (*format) { + /* Align to RGBA8. */ + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + *format = PIPE_FORMAT_R8G8B8A8_UNORM; + continue; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + *format = PIPE_FORMAT_R8G8B8A8_SNORM; + continue; + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + *format = PIPE_FORMAT_R8G8B8A8_USCALED; + continue; + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + *format = PIPE_FORMAT_R8G8B8A8_SSCALED; + continue; + + /* Align to RG16. */ + case PIPE_FORMAT_R16_UNORM: + *format = PIPE_FORMAT_R16G16_UNORM; + continue; + case PIPE_FORMAT_R16_SNORM: + *format = PIPE_FORMAT_R16G16_SNORM; + continue; + case PIPE_FORMAT_R16_USCALED: + *format = PIPE_FORMAT_R16G16_USCALED; + continue; + case PIPE_FORMAT_R16_SSCALED: + *format = PIPE_FORMAT_R16G16_SSCALED; + continue; + case PIPE_FORMAT_R16_FLOAT: + *format = PIPE_FORMAT_R16G16_FLOAT; + continue; + + /* Align to RGBA16. */ + case PIPE_FORMAT_R16G16B16_UNORM: + *format = PIPE_FORMAT_R16G16B16A16_UNORM; + continue; + case PIPE_FORMAT_R16G16B16_SNORM: + *format = PIPE_FORMAT_R16G16B16A16_SNORM; + continue; + case PIPE_FORMAT_R16G16B16_USCALED: + *format = PIPE_FORMAT_R16G16B16A16_USCALED; + continue; + case PIPE_FORMAT_R16G16B16_SSCALED: + *format = PIPE_FORMAT_R16G16B16A16_SSCALED; + continue; + case PIPE_FORMAT_R16G16B16_FLOAT: + *format = PIPE_FORMAT_R16G16B16A16_FLOAT; + continue; + + default:; + } + + size = util_format_get_blocksize(*format); if (size % 4 != 0) { /* XXX Shouldn't we align the format? */ fprintf(stderr, "r300_create_vertex_elements_state: " "Unaligned format %s:%i isn't supported\n", - util_format_name(attribs[i].src_format), size); + util_format_name(*format), size); assert(0); abort(); } } - r300_vertex_psc(velems); } } return velems; @@ -1359,14 +1388,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs_state.state = vs; - // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block - if (r300->fs.state) { - r300_vertex_shader_setup_wpos(r300); - } - memcpy(r300->vap_output_state.state, &vs->vap_out, - sizeof(struct r300_vap_output_state)); - r300->vap_output_state.dirty = TRUE; - /* The majority of the RS block bits is dependent on the vertex shader. */ r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 46c192eae1..e3adace0fa 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_fs.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state.h" @@ -42,6 +43,7 @@ enum r300_rs_swizzle { SWIZ_XYZW = 0, SWIZ_X001, SWIZ_XY01, + SWIZ_0001, }; static void r300_draw_emit_attrib(struct r300_context* r300, @@ -113,12 +115,11 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) static void r300_swtcl_vertex_psc(struct r300_context *r300) { struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; - struct r300_vertex_shader* vs = r300->vs_state.state; - struct vertex_info* vinfo = &r300->vertex_info; + struct vertex_info* vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; - int* vs_output_tab = vs->stream_loc_notcl; + int* vs_output_tab = r300->stream_loc_notcl; /* XXX hax */ memset(vstream, 0, sizeof(struct r300_vertex_stream_state)); @@ -169,10 +170,10 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) } static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R300_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -218,10 +219,10 @@ static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) } static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R500_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -267,21 +268,29 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) /* Set up the RS block. * - * This is the part of the chipset that actually does the rasterization - * of vertices into fragments. This is also the part of the chipset that - * locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300, - struct r300_shader_semantics* vs_outputs, - struct r300_shader_semantics* fs_inputs) + * This is the part of the chipset that is responsible for linking vertex + * and fragment shaders and stuffed texture coordinates. + * + * The rasterizer reads data from VAP, which produces vertex shader outputs, + * and GA, which produces stuffed texture coordinates. VAP outputs have + * precedence over GA. All outputs must be rasterized otherwise it locks up. + * If there are more outputs rasterized than is set in VAP/GA, it locks up + * too. The funky part is that this info has been pretty much obtained by trial + * and error. */ +static void r300_update_rs_block(struct r300_context *r300) { - struct r300_rs_block rs = { { 0 } }; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count; - void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + struct r300_vertex_shader *vs = r300->vs_state.state; + struct r300_shader_semantics *vs_outputs = &vs->outputs; + struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; + struct r300_rs_block rs = {0}; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; + int *stream_loc_notcl = r300->stream_loc_notcl; if (r300->screen->caps.is_r500) { rX00_rs_col = r500_rs_col; @@ -295,18 +304,39 @@ static void r300_update_rs_block(struct r300_context* r300, rX00_rs_tex_write = r300_rs_tex_write; } - /* Rasterize colors. */ + /* The position is always present in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + stream_loc_notcl[loc++] = 0; + + /* Set up the point size in VAP. */ + if (vs_outputs->psize != ATTR_UNUSED) { + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + stream_loc_notcl[loc++] = 1; + } + + /* Set up and rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || vs_outputs->color[1] != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ - rX00_rs_col(&rs, col_count, i, FALSE); + /* Set up the color in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + stream_loc_notcl[loc++] = 2 + i; + + /* Rasterize it. */ + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized color %i written to FS.\n", i); + } else { + DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i); } col_count++; } else { @@ -314,26 +344,51 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->color[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n", + i); } } } + /* Set up back-face colors. The rasterizer will do the color selection + * automatically. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + stream_loc_notcl[loc++] = 4 + i; + } + } + /* Rasterize texture coordinates. */ - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (!sprite_coord) { + /* Set up the texture coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + } + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); - if (sprite_coord) - debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized generic %i written to FS%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); + } else { + DBG(r300, DBG_RS, + "r300: Rasterized generic %i unused%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; } else { @@ -341,20 +396,31 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } } } /* Rasterize fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) { + /* Set up the fog coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n"); + } else { + DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; } else { @@ -362,25 +428,47 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } } /* Rasterize WPOS. */ - /* If the FS doesn't need it, it's not written by the VS. */ - if (vs_outputs->wpos != ATTR_UNUSED && fs_inputs->wpos != ATTR_UNUSED) { + /* Don't set it in VAP if the FS doesn't need it. */ + if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) { + /* Set up the WPOS coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + + /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); + DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n"); + fp_offset++; tex_count++; } + /* Invalidate the rest of the no-TCL (GA) stream locations. */ + for (; loc < 16;) { + stream_loc_notcl[loc++] = -1; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { - rX00_rs_col(&rs, 0, 0, TRUE); + rX00_rs_col(&rs, 0, 0, SWIZ_0001); col_count++; + + DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n"); } + DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " + "generics: %i.\n", col_count, tex_count); + rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; @@ -390,87 +478,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count*2; - } -} - -/* Update the shader-dependant states. */ -static void r300_update_derived_shader_state(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - - r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs); -} - -static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when a new depth or stencil value - * can be written and changed. */ - - /* We might optionally check for [Z func: never] and inspect the stencil - * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || - ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && - (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); -} - -static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when alpha testing can kill - * a fragment. */ - uint32_t af = dsa->alpha_function; - - return (af & R300_FG_ALPHA_FUNC_ENABLE) && - (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; -} - -static void r300_update_ztop(struct r300_context* r300) -{ - struct r300_ztop_state* ztop_state = - (struct r300_ztop_state*)r300->ztop_state.state; - - /* This is important enough that I felt it warranted a comment. - * - * According to the docs, these are the conditions where ZTOP must be - * disabled: - * 1) Alpha testing enabled - * 2) Texture kill instructions in fragment shader - * 3) Chroma key culling enabled - * 4) W-buffering enabled - * - * The docs claim that for the first three cases, if no ZS writes happen, - * then ZTOP can be used. - * - * (3) will never apply since we do not support chroma-keyed operations. - * (4) will need to be re-examined (and this comment updated) if/when - * Hyper-Z becomes supported. - * - * Additionally, the following conditions require disabled ZTOP: - * 5) Depth writes in fragment shader - * 6) Outstanding occlusion queries - * - * This register causes stalls all the way from SC to CB when changed, - * but it is buffered on-chip so it does not hurt to write it if it has - * not changed. - * - * ~C. - */ - - /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ - r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else { - ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + r300->rs_block_state.size = 11 + count*2; } - - r300->ztop_state.dirty = TRUE; } static void r300_merge_textures_and_samplers(struct r300_context* r300) @@ -568,7 +577,7 @@ void r300_update_derived_state(struct r300_context* r300) } if (r300->rs_block_state.dirty) { - r300_update_derived_shader_state(r300); + r300_update_rs_block(r300); } if (r300->draw) { @@ -578,5 +587,5 @@ void r300_update_derived_state(struct r300_context* r300) r300_swtcl_vertex_psc(r300); } - r300_update_ztop(r300); + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 480d0f7c4a..fcbdb91b67 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -374,6 +374,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } switch (desc->channel[0].type) { @@ -395,6 +396,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } break; /* Unsigned ints */ @@ -418,12 +420,14 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: desc->channel[0].size == %d\n", desc->channel[0].size); assert(0); + abort(); } break; default: fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { @@ -439,6 +443,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned i, swizzle = 0; assert(format); @@ -448,11 +453,19 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + for (i = 0; i < desc->nr_channels; i++) { + swizzle |= + MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i); + } + /* Set (0,0,0,1) in unused components. */ + for (; i < 3; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i); + } + for (; i < 4; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i); + } + + return swizzle | (0xf << R300_WRITE_ENA_SHIFT); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 64d1d18b45..cd9443fa26 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,15 +43,17 @@ void r300_emit_invariant_state(struct r300_context* r300, { CS_LOCALS(r300); + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + /* Subpixel multisampling for AA. */ + BEGIN_CS(4); + OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); + OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); + END_CS; + } + BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); /*** Graphics Backend (GB) ***/ - /* Subpixel multisampling for AA - * These are commented out because glisse's CS checker doesn't like them. - * I presume these will be re-enabled later. - * OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - * OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); - */ /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 8bebeacf86..69e6a12445 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -34,9 +34,6 @@ #include "r300_screen.h" #include "r300_winsys.h" -/* XXX Enable float textures here. */ -/*#define ENABLE_FLOAT_TEXTURES*/ - #define TILE_WIDTH 0 #define TILE_HEIGHT 1 @@ -74,7 +71,7 @@ static boolean r300_format_is_plain(enum pipe_format format) * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle) + const unsigned char *swizzle_view) { uint32_t result = 0; const struct util_format_description *desc; @@ -98,6 +95,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, R300_TX_FORMAT_SIGNED_Z, R300_TX_FORMAT_SIGNED_W, }; + unsigned char swizzle[4]; desc = util_format_description(format); @@ -144,25 +142,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add swizzle. */ - if (!swizzle) { - swizzle = desc->swizzle; - } /*else { - if (swizzle[0] != desc->swizzle[0] || - swizzle[1] != desc->swizzle[1] || - swizzle[2] != desc->swizzle[2] || - swizzle[3] != desc->swizzle[3]) - { - const char n[6] = "RGBA01"; - fprintf(stderr, "Got different swizzling! Format: %c%c%c%c, " - "View: %c%c%c%c\n", - n[desc->swizzle[0]], n[desc->swizzle[1]], - n[desc->swizzle[2]], n[desc->swizzle[3]], - n[swizzle[0]], n[swizzle[1]], n[swizzle[2]], - n[swizzle[3]]); + /* Get swizzle. */ + if (swizzle_view) { + /* Compose two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + desc->swizzle[swizzle_view[i]] : swizzle_view[i]; } - }*/ + } else { + memcpy(swizzle, desc->swizzle, sizeof(swizzle)); + } + /* Add swizzle. */ for (i = 0; i < 4; i++) { switch (swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: @@ -316,7 +307,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, } return ~0; -#if defined(ENABLE_FLOAT_TEXTURES) case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[0].size) { case 16: @@ -340,7 +330,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_32F_32F_32F_32F | result; } } -#endif } return ~0; /* Unsupported/unknown. */ @@ -405,16 +394,12 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R16G16B16A16_FLOAT: -#endif return R300_COLOR_FORMAT_ARGB16161616; /* 128-bit buffers. */ -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R32G32B32A32_FLOAT: return R300_COLOR_FORMAT_ARGB32323232; -#endif /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -532,7 +517,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - //case PIPE_FORMAT_R16G16B16A16_FLOAT: /* not in pipe_format */ + case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT: return modifier | R300_C0_SEL_R | R300_C1_SEL_G | @@ -573,7 +558,7 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, if (tex->uses_pitch) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->pitch[0] - 1) & 0x1fff; + f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); @@ -614,7 +599,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, if (util_format_is_depth_or_stencil(tex->b.b.format)) { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.depthpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | R300_DEPTHMICROTILE(tex->microtile); } @@ -622,7 +607,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, } else { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.colorpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | r300_translate_colorformat(tex->b.b.format) | R300_COLOR_TILE(tex->mip_macrotile[i]) | R300_COLOR_MICROTILE(tex->microtile); @@ -762,12 +747,12 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, struct r300_texture *tex) { - /* The kernels <= 2.6.34-rc3 compute the size of mipmapped 3D textures + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures * incorrectly. This is a workaround to prevent CS from being rejected. */ unsigned i, size; - if (screen->rws->get_value(screen->rws, R300_VID_TEX3D_MIP_BUG) && + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && tex->b.b.target == PIPE_TEXTURE_3D && tex->b.b.last_level > 0) { size = 0; @@ -813,6 +798,8 @@ static void r300_setup_miptree(struct r300_screen* screen, tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); + tex->hwpitch[i] = + tex->pitch[i] * util_format_get_blockwidth(base->format); SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 453d42b188..ba79ec068a 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -28,7 +28,7 @@ struct r300_texture; uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle); + const unsigned char *swizzle_view); uint32_t r500_tx_format_msb_bit(enum pipe_format format); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index f6428ed760..89f39af976 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -108,11 +108,9 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ case TGSI_OPCODE_IF: return RC_OPCODE_IF; /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ - /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ - /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index bfab9c3b01..b7609bad81 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -94,94 +94,6 @@ static void r300_shader_read_vs_outputs( vs_outputs->wpos = i; } -/* This function sets up: - * - VAP mapping, which maps VS registers to output semantics and - * at the same time it indicates which attributes are enabled and should - * be rasterized. - * - Stream mapping to VS outputs if TCL is not present. */ -static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs) -{ - struct r300_shader_semantics* vs_outputs = &vs->outputs; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int *stream_loc = vs->stream_loc_notcl; - int i, gen_count, tabi = 0; - boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || - vs_outputs->bcolor[1] != ATTR_UNUSED; - - vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* Position. */ - if (vs_outputs->pos != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - stream_loc[tabi++] = 0; - } else { - assert(0); - } - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - stream_loc[tabi++] = 1; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || - vs_outputs->color[1] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; - - stream_loc[tabi++] = 2 + i; - } - } - - /* Back-face colors. */ - if (any_bcolor_used) { - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); - - stream_loc[tabi++] = 4 + i; - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - } - - /* Fog coordinates. */ - if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - - /* WPOS. */ - if (gen_count < 8) { - vs->wpos_tex_output = gen_count; - stream_loc[tabi++] = 6 + gen_count; - } else { - vs_outputs->wpos = ATTR_UNUSED; - } - - for (; tabi < 16;) { - stream_loc[tabi++] = -1; - } -} - static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; @@ -246,9 +158,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } /* WPOS. */ - if (outputs->wpos != ATTR_UNUSED) { - c->code->outputs[outputs->wpos] = reg++; - } + c->code->outputs[outputs->wpos] = reg++; } static void r300_dummy_vertex_shader( @@ -286,7 +196,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, tgsi_scan_shader(tokens, &vs->info); r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_init_vs_output_mapping(vs); /* Setup the compiler */ rc_init(&compiler.Base); @@ -307,16 +216,11 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, tokens); - compiler.RequiredOutputs = - ~(~0 << (vs->info.num_outputs + - (vs->outputs.wpos != ATTR_UNUSED ? 1 : 0))); - + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; /* Insert the WPOS output. */ - if (vs->outputs.wpos != ATTR_UNUSED) { - rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); - } + rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); @@ -343,32 +247,3 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* And, finally... */ rc_destroy(&compiler.Base); } - -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int tex_output = vs->wpos_tex_output; - uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; - - if (vs->outputs.wpos == ATTR_UNUSED) { - return FALSE; - } - - if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { - /* Enable WPOS in VAP. */ - if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) { - vap_out->vap_vsm_vtx_assm |= tex_fmt; - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output)); - return TRUE; - } - } else { - /* Disable WPOS in VAP. */ - if (vap_out->vap_vsm_vtx_assm & tex_fmt) { - vap_out->vap_vsm_vtx_assm &= ~tex_fmt; - vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output)); - return TRUE; - } - } - return FALSE; -} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 56bcc3b70b..57b3fbca0b 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -39,7 +39,6 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - struct r300_vap_output_state vap_out; /* Whether the shader was replaced by a dummy one due to a shader * compilation failure. */ @@ -49,12 +48,6 @@ struct r300_vertex_shader { unsigned externals_count; unsigned immediates_count; - /* Stream locations for SWTCL or if TCL is bypassed. */ - int stream_loc_notcl[16]; - - /* Output stream location for WPOS. */ - int wpos_tex_output; - /* HWTCL-specific. */ /* Machine code (if translated) */ struct r300_vertex_program_code code; @@ -67,7 +60,4 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs, const struct tgsi_token *tokens); -/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); - #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 2bd40176d1..1642981eaa 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -39,7 +39,7 @@ enum r300_value_id { R300_VID_GB_PIPES, R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_TEX3D_MIP_BUG, + R300_VID_DRM_2_3_0, }; enum r300_reference_domain { /* bitfield */ @@ -119,6 +119,10 @@ struct r300_winsys_screen { /* Write a dword to the command buffer. */ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + /* Write a table of dwords to the command buffer. */ + void (*write_cs_table)(struct r300_winsys_screen* winsys, + const void *dwords, unsigned count); + /* Write a relocated dword to the command buffer. */ void (*write_cs_reloc)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buf, diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 5f130453c3..ae3f00f338 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -69,11 +69,6 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &uc); sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, uc.ui); - -#if !TILE_CLEAR_OPTIMIZATION - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); -#endif } } @@ -83,11 +78,6 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, cv = util_pack_z_stencil(ps->format, depth, stencil); sp_tile_cache_clear(softpipe->zsbuf_cache, zero, cv); - -#if !TILE_CLEAR_OPTIMIZATION - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); -#endif } softpipe->dirty_render_cache = TRUE; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 508fe8f764..5024fc8a81 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -104,3 +104,71 @@ softpipe_flush( struct pipe_context *pipe, *fence = NULL; } + +/** + * Flush context if necessary. + * + * Returns FALSE if it would have block, but do_not_block was set, TRUE + * otherwise. + * + * TODO: move this logic to an auxiliary library? + */ +boolean +softpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block) +{ + unsigned referenced; + + referenced = pipe->is_resource_referenced(pipe, texture, face, level); + + if ((referenced & PIPE_REFERENCED_FOR_WRITE) || + ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; + + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + + if (cpu_access) { + /* + * Flush and wait. + */ + + struct pipe_fence_handle *fence = NULL; + + if (do_not_block) + return FALSE; + + pipe->flush(pipe, flush_flags, &fence); + + if (fence) { + /* + * This is for illustrative purposes only, as softpipe does not + * have fences. + */ + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } else { + /* + * Just flush. + */ + + pipe->flush(pipe, flush_flags, NULL); + } + } + + return TRUE; +} diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index 68d9b5fa83..cb97482a71 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -28,10 +28,23 @@ #ifndef SP_FLUSH_H #define SP_FLUSH_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_fence_handle; -void softpipe_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); +void +softpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); + +boolean +softpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block); #endif diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 7b1e058ac8..8bb0294238 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -70,6 +71,8 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -144,43 +147,77 @@ static boolean softpipe_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, - unsigned tex_usage, + unsigned bind, unsigned geom_flags ) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + const struct util_format_description *format_desc; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - switch(format) { - case PIPE_FORMAT_YUYV: - case PIPE_FORMAT_UYVY: + format_desc = util_format_description(format); + if (!format_desc) return FALSE; - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return util_format_s3tc_enabled; + if (bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) { + if(!winsys->is_displaytarget_format_supported(winsys, bind, format)) + return FALSE; + } - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_NONE: - return FALSE; + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; - default: - break; + /* + * Although possible, it is unnatural to render into compressed or YUV + * surfaces. So disable these here to avoid going into weird paths + * inside the state trackers. + */ + if (format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + /* + * TODO: Unfortunately we cannot render into anything more than 32 bits + * because we encode color clear values into a 32bit word. + */ + if (format_desc->block.bits > 32) + return FALSE; } - if(tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; + + /* + * TODO: Unfortunately we cannot render into anything more than 32 bits + * because we encode depth and stencil clear values into a 32bit word. + */ + if (format_desc->block.bits > 32) + return FALSE; + + /* + * TODO: eliminate this restriction + */ + if (format == PIPE_FORMAT_Z32_FLOAT) + return FALSE; + } + + /* + * All other operations (sampling, transfer, etc). + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + return util_format_s3tc_enabled; } - /* XXX: this is often a lie. Pull in logic from llvmpipe to fix. + /* + * Everything else should be supported by u_format. */ return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index c79f5fb05a..fbce9e042b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -248,7 +248,8 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, addr.bits.face, addr.bits.level, addr.bits.z, - PIPE_TRANSFER_READ, 0, 0, + PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, + 0, 0, u_minify(tc->texture->width0, addr.bits.level), u_minify(tc->texture->height0, addr.bits.level)); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 167b6b1161..7aa85559b2 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -39,6 +39,7 @@ #include "util/u_transfer.h" #include "sp_context.h" +#include "sp_flush.h" #include "sp_texture.h" #include "sp_screen.h" @@ -214,6 +215,35 @@ softpipe_resource_get_handle(struct pipe_screen *screen, /** + * Helper function to compute offset (in bytes) for a particular + * texture level/face/slice from the start of the buffer. + */ +static unsigned +sp_get_tex_image_offset(const struct softpipe_resource *spr, + unsigned level, unsigned face, unsigned zslice) +{ + const unsigned hgt = u_minify(spr->base.height0, level); + const unsigned nblocksy = util_format_get_nblocksy(spr->base.format, hgt); + unsigned offset = spr->level_offset[level]; + + if (spr->base.target == PIPE_TEXTURE_CUBE) { + assert(zslice == 0); + offset += face * nblocksy * spr->stride[level]; + } + else if (spr->base.target == PIPE_TEXTURE_3D) { + assert(face == 0); + offset += zslice * nblocksy * spr->stride[level]; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + return offset; +} + + +/** * Get a pipe_surface "view" into a texture resource. */ static struct pipe_surface * @@ -234,25 +264,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = spr->level_offset[level]; + ps->offset = sp_get_tex_image_offset(spr, level, face, zslice); ps->usage = usage; ps->face = face; ps->level = level; ps->zslice = zslice; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * - spr->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * - spr->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } } return ps; } @@ -290,8 +307,8 @@ softpipe_get_transfer(struct pipe_context *pipe, unsigned usage, const struct pipe_box *box) { - struct softpipe_resource *sprex = softpipe_resource(resource); - struct softpipe_transfer *spr; + struct softpipe_resource *spr = softpipe_resource(resource); + struct softpipe_transfer *spt; assert(resource); assert(sr.level <= resource->last_level); @@ -301,33 +318,41 @@ softpipe_get_transfer(struct pipe_context *pipe, assert(box->y + box->height <= u_minify(resource->height0, sr.level)); assert(box->z + box->depth <= u_minify(resource->depth0, sr.level)); - spr = CALLOC_STRUCT(softpipe_transfer); - if (spr) { - struct pipe_transfer *pt = &spr->base; + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + boolean read_only = !(usage & PIPE_TRANSFER_WRITE); + boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); + if (!softpipe_flush_resource(pipe, resource, + sr.face, sr.level, + 0, /* flush_flags */ + read_only, + TRUE, /* cpu_access */ + do_not_block)) { + /* + * It would have blocked, but state tracker requested no to. + */ + assert(do_not_block); + return NULL; + } + } + + spt = CALLOC_STRUCT(softpipe_transfer); + if (spt) { + struct pipe_transfer *pt = &spt->base; enum pipe_format format = resource->format; - int nblocksy = util_format_get_nblocksy(resource->format, - u_minify(resource->height0, sr.level)); pipe_resource_reference(&pt->resource, resource); pt->sr = sr; pt->usage = usage; pt->box = *box; - pt->stride = sprex->stride[sr.level]; + pt->stride = spr->stride[sr.level]; - spr->offset = sprex->level_offset[sr.level]; - - if (resource->target == PIPE_TEXTURE_CUBE) { - spr->offset += sr.face * nblocksy * pt->stride; - } - else if (resource->target == PIPE_TEXTURE_3D) { - spr->offset += box->z * nblocksy * pt->stride; - } - else { - assert(sr.face == 0); - assert(box->z == 0); - } - - spr->offset += - box->y / util_format_get_blockheight(format) * spr->base.stride + + spt->offset = sp_get_tex_image_offset(spr, sr.level, sr.face, box->z); + + spt->offset += + box->y / util_format_get_blockheight(format) * spt->base.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); return pt; @@ -356,26 +381,24 @@ static void * softpipe_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct softpipe_transfer *sp_transfer = softpipe_transfer(transfer); - struct softpipe_resource *sp_resource = softpipe_resource(transfer->resource); + struct softpipe_transfer *spt = softpipe_transfer(transfer); + struct softpipe_resource *spr = softpipe_resource(transfer->resource); struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; uint8_t *map; /* resources backed by display target treated specially: */ - if (sp_resource->dt) { - map = winsys->displaytarget_map(winsys, - sp_resource->dt, - transfer->usage); + if (spr->dt) { + map = winsys->displaytarget_map(winsys, spr->dt, transfer->usage); } else { - map = sp_resource->data; + map = spr->data; } if (map == NULL) return NULL; else - return map + sp_transfer->offset; + return map + spt->offset; } @@ -412,26 +435,25 @@ softpipe_user_buffer_create(struct pipe_screen *screen, unsigned bytes, unsigned bind_flags) { - struct softpipe_resource *buffer; + struct softpipe_resource *spr; - buffer = CALLOC_STRUCT(softpipe_resource); - if(!buffer) + spr = CALLOC_STRUCT(softpipe_resource); + if (!spr) return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ - buffer->base.bind = bind_flags; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.flags = 0; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; + pipe_reference_init(&spr->base.reference, 1); + spr->base.screen = screen; + spr->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ + spr->base.bind = bind_flags; + spr->base.usage = PIPE_USAGE_IMMUTABLE; + spr->base.flags = 0; + spr->base.width0 = bytes; + spr->base.height0 = 1; + spr->base.depth0 = 1; + spr->userBuffer = TRUE; + spr->data = ptr; + + return &spr->base; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index d996c2a342..f4db6f6ef0 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -103,7 +103,7 @@ sp_create_tile_cache( struct pipe_context *pipe ) * However, it breaks clearing in other situations (such as in * progs/tests/drawbuffers, see bug 24402). */ -#if 0 && TILE_CLEAR_OPTIMIZATION +#if 0 /* set flags to indicate all the tiles are cleared */ memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); #endif @@ -155,7 +155,8 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, if (ps) { tc->transfer = pipe_get_transfer(pipe, ps->texture, ps->face, ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE, + PIPE_TRANSFER_READ_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, 0, 0, ps->width, ps->height); tc->depth_stencil = (ps->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || @@ -344,9 +345,7 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) } } -#if TILE_CLEAR_OPTIMIZATION sp_tile_cache_flush_clear(tc); -#endif } #if 0 @@ -448,13 +447,8 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, tc->clear_val = clearValue; -#if TILE_CLEAR_OPTIMIZATION /* set flags to indicate all the tiles are cleared */ memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); -#else - /* disable the optimization */ - memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); -#endif for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 753d8c0daa..e03d53eb24 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -28,8 +28,6 @@ #ifndef SP_TILE_CACHE_H #define SP_TILE_CACHE_H -#define TILE_CLEAR_OPTIMIZATION 1 - #include "pipe/p_compiler.h" diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 005996d05d..da33fae62f 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -277,12 +277,13 @@ svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, ret = svga_hwtnl_simple_draw_range_elements( hwtnl, gen_buf, gen_size, + start, 0, count - 1, gen_prim, 0, - gen_nr, - start ); + gen_nr ); + if (ret) goto done; diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index 15258c1966..ba630582e5 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -14,7 +14,7 @@ svga_resource_create(struct pipe_screen *screen, if (template->target == PIPE_BUFFER) return svga_buffer_create(screen, template); else - return svga_resource_create(screen, template); + return svga_texture_create(screen, template); } @@ -26,7 +26,7 @@ svga_resource_from_handle(struct pipe_screen * screen, if (template->target == PIPE_BUFFER) return NULL; else - return svga_resource_from_handle(screen, template, whandle); + return svga_texture_from_handle(screen, template, whandle); } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index aeda3dcad5..9fc613da74 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -397,6 +397,7 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->fence_finish = svga_fence_finish; svgascreen->sws = sws; + svga_screen_init_surface_functions(svgascreen); svga_init_screen_resource_functions(svgascreen); svgascreen->use_ps30 = diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index dfaab53aef..d34d68f535 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -43,7 +43,7 @@ svga_translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R32G32_FLOAT: return SVGA3D_DECLTYPE_FLOAT2; case PIPE_FORMAT_R32G32B32_FLOAT: return SVGA3D_DECLTYPE_FLOAT3; case PIPE_FORMAT_R32G32B32A32_FLOAT: return SVGA3D_DECLTYPE_FLOAT4; - case PIPE_FORMAT_A8R8G8B8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; + case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; case PIPE_FORMAT_R8G8B8A8_USCALED: return SVGA3D_DECLTYPE_UBYTE4; case PIPE_FORMAT_R16G16_SSCALED: return SVGA3D_DECLTYPE_SHORT2; case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4; @@ -52,14 +52,10 @@ svga_translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_SNORM: return SVGA3D_DECLTYPE_SHORT4N; case PIPE_FORMAT_R16G16_UNORM: return SVGA3D_DECLTYPE_USHORT2N; case PIPE_FORMAT_R16G16B16A16_UNORM: return SVGA3D_DECLTYPE_USHORT4N; - - /* These formats don't exist yet: - * - case PIPE_FORMAT_R10G10B10_USCALED: return SVGA3D_DECLTYPE_UDEC3; - case PIPE_FORMAT_R10G10B10_SNORM: return SVGA3D_DECLTYPE_DEC3N; + case PIPE_FORMAT_R10G10B10X2_USCALED: return SVGA3D_DECLTYPE_UDEC3; + case PIPE_FORMAT_R10G10B10X2_SNORM: return SVGA3D_DECLTYPE_DEC3N; case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; - */ default: /* There are many formats without hardware support. This case diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index a6215c68cb..5133c70593 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -229,7 +229,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, - vbuffer->stride); + vbuffer->stride, vbuffer->max_index); translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 3d4f56a67b..7d7024c4a7 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -49,9 +49,7 @@ translate_opcode( case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; - case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; - case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP; case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; @@ -2686,7 +2684,6 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || - emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 8216c06260..71ba1e909d 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1142,12 +1142,12 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, trace_dump_arg_end(); if (num_buffers) { - struct pipe_vertex_buffer *_buffers = malloc(num_buffers * sizeof(*_buffers)); + struct pipe_vertex_buffer *_buffers = MALLOC(num_buffers * sizeof(*_buffers)); memcpy(_buffers, buffers, num_buffers * sizeof(*_buffers)); for (i = 0; i < num_buffers; i++) _buffers[i].buffer = trace_resource_unwrap(tr_ctx, buffers[i].buffer); pipe->set_vertex_buffers(pipe, num_buffers, _buffers); - free(_buffers); + FREE(_buffers); } else { pipe->set_vertex_buffers(pipe, num_buffers, NULL); } diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index eaa47df406..0dc8cca264 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -73,7 +73,7 @@ trace_drm_destroy(struct drm_api *_api) if (api->destroy) api->destroy(api); - free(tr_api); + FREE(tr_api); } struct drm_api * diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0a4bd584ae..1aa54f1423 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -269,7 +269,7 @@ enum pipe_transfer_usage { * - pipe_context::transfer_flush_region * - OpenGL's ARB_map_buffer_range extension, MAP_FLUSH_EXPLICIT_BIT flag. */ - PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11), + PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11) }; @@ -291,10 +291,10 @@ enum pipe_transfer_usage { #define PIPE_BIND_TRANSFER_READ (1 << 10) /* get_transfer */ #define PIPE_BIND_CUSTOM (1 << 16) /* state-tracker/winsys usages */ -/* The first two flags were previously part of the amorphous +/* The first two flags above were previously part of the amorphous * TEXTURE_USAGE, most of which are now descriptions of the ways a - * particular texture can be bound to the gallium pipeline. These two - * do not fit within that and probably need to be migrated to some + * particular texture can be bound to the gallium pipeline. The two flags + * below do not fit within that and probably need to be migrated to some * other place. * * It seems like scanout is used by the Xorg state tracker to ask for @@ -304,7 +304,7 @@ enum pipe_transfer_usage { * * The shared flag is quite underspecified, but certainly isn't a * binding flag - it seems more like a message to the winsys to create - * a shareable allocation. Could it mean that this texture is a valid argument for + * a shareable allocation. */ #define PIPE_BIND_SCANOUT (1 << 14) /* */ #define PIPE_BIND_SHARED (1 << 15) /* get_texture_handle ??? */ diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 06ab4a848a..beff1ae8a9 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -93,13 +93,13 @@ struct pipe_screen { /** * Check if the given pipe_format is supported as a texture or * drawing surface. - * \param tex_usage bitmask of PIPE_BIND_* + * \param bindings bitmask of PIPE_BIND_* * \param geom_flags bitmask of PIPE_TEXTURE_GEOM_* */ boolean (*is_format_supported)( struct pipe_screen *, enum pipe_format format, enum pipe_texture_target target, - unsigned tex_usage, + unsigned bindings, unsigned geom_flags ); /** diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c5c480f1f0..e21aaacc18 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -276,12 +276,10 @@ struct tgsi_property_data { #define TGSI_OPCODE_TXL 72 #define TGSI_OPCODE_BRK 73 #define TGSI_OPCODE_IF 74 -#define TGSI_OPCODE_BGNFOR 75 -#define TGSI_OPCODE_REP 76 + /* gap */ #define TGSI_OPCODE_ELSE 77 #define TGSI_OPCODE_ENDIF 78 -#define TGSI_OPCODE_ENDFOR 79 -#define TGSI_OPCODE_ENDREP 80 + /* gap */ #define TGSI_OPCODE_PUSHA 81 #define TGSI_OPCODE_POPA 82 #define TGSI_OPCODE_CEIL 83 diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 8897ff7c25..002d1c6b84 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -43,14 +43,6 @@ */ /** - * The entry points of the state trackers. - */ -#define ST_MODULE_OPENGL_SYMBOL "st_module_OpenGL" -#define ST_MODULE_OPENGL_ES1_SYMBOL "st_module_OpenGL_ES1" -#define ST_MODULE_OPENGL_ES2_SYMBOL "st_module_OpenGL_ES2" -#define ST_MODULE_OPENVG_SYMBOL "st_module_OpenVG" - -/** * The supported rendering API of a state tracker. */ enum st_api_type { @@ -379,17 +371,6 @@ struct st_api }; /** - * Represent a state tracker. - * - * This is the entry point of a state tracker. - */ -struct st_module -{ - enum st_api_type api; - struct st_api *(*create_api)(void); -}; - -/** * Return true if the visual has the specified buffers. */ static INLINE boolean @@ -399,9 +380,17 @@ st_visual_have_buffers(const struct st_visual *visual, unsigned mask) } /* these symbols may need to be dynamically lookup up */ -extern PUBLIC const struct st_module st_module_OpenGL; -extern PUBLIC const struct st_module st_module_OpenGL_ES1; -extern PUBLIC const struct st_module st_module_OpenGL_ES2; -extern PUBLIC const struct st_module st_module_OpenVG; +extern PUBLIC struct st_api * st_api_create_OpenGL(void); +extern PUBLIC struct st_api * st_api_create_OpenGL_ES1(void); +extern PUBLIC struct st_api * st_api_create_OpenGL_ES2(void); +extern PUBLIC struct st_api * st_api_create_OpenVG(void); + +/** + * The entry points of the state trackers. + */ +#define ST_CREATE_OPENGL_SYMBOL "st_api_create_OpenGL" +#define ST_CREATE_OPENGL_ES1_SYMBOL "st_api_create_OpenGL_ES1" +#define ST_CREATE_OPENGL_ES2_SYMBOL "st_api_create_OpenGL_ES2" +#define ST_CREATE_OPENVG_SYMBOL "st_api_create_OpenVG" #endif /* _ST_API_H_ */ diff --git a/src/gallium/state_trackers/dri/common/dri1_helper.c b/src/gallium/state_trackers/dri/common/dri1_helper.c index b0dd974a96..f641b41ff8 100644 --- a/src/gallium/state_trackers/dri/common/dri1_helper.c +++ b/src/gallium/state_trackers/dri/common/dri1_helper.c @@ -42,7 +42,7 @@ struct pipe_fence_handle * dri1_swap_fences_pop_front(struct dri_drawable *draw) { - struct pipe_screen *screen = dri_screen(draw->sPriv)->pipe_screen; + struct pipe_screen *screen = dri_screen(draw->sPriv)->base.screen; struct pipe_fence_handle *fence = NULL; if (draw->cur_fences >= draw->desired_fences) { @@ -58,7 +58,7 @@ void dri1_swap_fences_push_back(struct dri_drawable *draw, struct pipe_fence_handle *fence) { - struct pipe_screen *screen = dri_screen(draw->sPriv)->pipe_screen; + struct pipe_screen *screen = dri_screen(draw->sPriv)->base.screen; if (!fence) return; @@ -74,7 +74,7 @@ dri1_swap_fences_push_back(struct dri_drawable *draw, void dri1_swap_fences_clear(struct dri_drawable *drawable) { - struct pipe_screen *screen = dri_screen(drawable->sPriv)->pipe_screen; + struct pipe_screen *screen = dri_screen(drawable->sPriv)->base.screen; struct pipe_fence_handle *fence; while (drawable->cur_fences) { @@ -86,7 +86,7 @@ dri1_swap_fences_clear(struct dri_drawable *drawable) struct pipe_surface * dri1_get_pipe_surface(struct dri_drawable *drawable, struct pipe_resource *ptex) { - struct pipe_screen *pipe_screen = dri_screen(drawable->sPriv)->pipe_screen; + struct pipe_screen *pipe_screen = dri_screen(drawable->sPriv)->base.screen; struct pipe_surface *psurf = drawable->dri1_surface; if (!psurf || psurf->texture != ptex) { @@ -114,7 +114,7 @@ dri1_get_pipe_context(struct dri_screen *screen) if (!pipe) { screen->dri1_pipe = - screen->pipe_screen->context_create(screen->pipe_screen, NULL); + screen->base.screen->context_create(screen->base.screen, NULL); pipe = screen->dri1_pipe; } diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index f14f4130bf..a808d2d9dd 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -34,7 +34,6 @@ #include "dri_screen.h" #include "dri_drawable.h" #include "dri_context.h" -#include "dri_st_api.h" #include "pipe/p_context.h" #include "state_tracker/st_context.h" @@ -53,9 +52,9 @@ GLboolean dri_create_context(const __GLcontextModes * visual, __DRIcontext * cPriv, void *sharedContextPrivate) { - struct st_api *stapi = dri_get_st_api(); __DRIscreen *sPriv = cPriv->driScreenPriv; struct dri_screen *screen = dri_screen(sPriv); + struct st_api *stapi = screen->st_api; struct dri_context *ctx = NULL; struct st_context_iface *st_share = NULL; struct st_visual stvis; @@ -77,7 +76,7 @@ dri_create_context(const __GLcontextModes * visual, &screen->optionCache, sPriv->myNum, "dri"); dri_fill_st_visual(&stvis, screen, visual); - ctx->st = stapi->create_context(stapi, screen->smapi, &stvis, st_share); + ctx->st = stapi->create_context(stapi, &screen->base, &stvis, st_share); if (ctx->st == NULL) goto fail; ctx->st->st_manager_private = (void *) ctx; @@ -119,16 +118,15 @@ dri_destroy_context(__DRIcontext * cPriv) GLboolean dri_unbind_context(__DRIcontext * cPriv) { - struct st_api *stapi = dri_get_st_api(); - - if (cPriv) { - struct dri_context *ctx = dri_context(cPriv); + /* dri_util.c ensures cPriv is not null */ + struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); + struct dri_context *ctx = dri_context(cPriv); + struct st_api *stapi = screen->st_api; - if (--ctx->bind_count == 0) { - if (ctx->st == stapi->get_current(stapi)) { - ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); - stapi->make_current(stapi, NULL, NULL, NULL); - } + if (--ctx->bind_count == 0) { + if (ctx->st == stapi->get_current(stapi)) { + ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + stapi->make_current(stapi, NULL, NULL, NULL); } } @@ -140,42 +138,38 @@ dri_make_current(__DRIcontext * cPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv) { - struct st_api *stapi = dri_get_st_api(); - - if (cPriv) { - struct dri_context *ctx = dri_context(cPriv); - struct dri_drawable *draw = dri_drawable(driDrawPriv); - struct dri_drawable *read = dri_drawable(driReadPriv); - struct st_context_iface *old_st; + /* dri_util.c ensures cPriv is not null */ + struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); + struct dri_context *ctx = dri_context(cPriv); + struct st_api *stapi = screen->st_api; + struct dri_drawable *draw = dri_drawable(driDrawPriv); + struct dri_drawable *read = dri_drawable(driReadPriv); + struct st_context_iface *old_st = stapi->get_current(stapi); - old_st = stapi->get_current(stapi); - if (old_st && old_st != ctx->st) - ctx->st->flush(old_st, PIPE_FLUSH_RENDER_CACHE, NULL); + if (old_st && old_st != ctx->st) + old_st->flush(old_st, PIPE_FLUSH_RENDER_CACHE, NULL); - ++ctx->bind_count; + ++ctx->bind_count; - if (ctx->dPriv != driDrawPriv) { - ctx->dPriv = driDrawPriv; - draw->texture_stamp = driDrawPriv->lastStamp - 1; - } - if (ctx->rPriv != driReadPriv) { - ctx->rPriv = driReadPriv; - read->texture_stamp = driReadPriv->lastStamp - 1; - } - - stapi->make_current(stapi, ctx->st, draw->stfb, read->stfb); + if (ctx->dPriv != driDrawPriv) { + ctx->dPriv = driDrawPriv; + draw->texture_stamp = driDrawPriv->lastStamp - 1; } - else { - stapi->make_current(stapi, NULL, NULL, NULL); + if (ctx->rPriv != driReadPriv) { + ctx->rPriv = driReadPriv; + read->texture_stamp = driReadPriv->lastStamp - 1; } + stapi->make_current(stapi, ctx->st, &draw->base, &read->base); + return GL_TRUE; } struct dri_context * -dri_get_current(void) +dri_get_current(__DRIscreen *sPriv) { - struct st_api *stapi = dri_get_st_api(); + struct dri_screen *screen = dri_screen(sPriv); + struct st_api *stapi = screen->st_api; struct st_context_iface *st; st = stapi->get_current(stapi); diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index 594618874a..9fe6b58101 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -80,7 +80,7 @@ dri_make_current(__DRIcontext * driContextPriv, __DRIdrawable * driReadPriv); struct dri_context * -dri_get_current(void); +dri_get_current(__DRIscreen * driScreenPriv); boolean dri_create_context(const __GLcontextModes * visual, diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index 88c17e81bf..25892fc7a7 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -32,13 +32,82 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri_st_api.h" #include "dri1_helper.h" #include "pipe/p_screen.h" #include "util/u_format.h" #include "util/u_memory.h" +#include "util/u_inlines.h" + +static boolean +dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, + const enum st_attachment_type *statts, + unsigned count, + struct pipe_resource **out) +{ + struct dri_drawable *drawable = + (struct dri_drawable *) stfbi->st_manager_private; + struct dri_screen *screen = dri_screen(drawable->sPriv); + unsigned statt_mask, new_mask; + boolean new_stamp; + int i; + + statt_mask = 0x0; + for (i = 0; i < count; i++) + statt_mask |= (1 << statts[i]); + + /* record newly allocated textures */ + new_mask = (statt_mask & ~drawable->texture_mask); + + /* + * dPriv->pStamp is the server stamp. It should be accessed with a lock, at + * least for DRI1. dPriv->lastStamp is the client stamp. It has the value + * of the server stamp when last checked. + */ + new_stamp = (drawable->texture_stamp != drawable->dPriv->lastStamp); + + if (new_stamp || new_mask) { + if (new_stamp && screen->update_drawable_info) + screen->update_drawable_info(drawable); + + screen->allocate_textures(drawable, statts, count); + + /* add existing textures */ + for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { + if (drawable->textures[i]) + statt_mask |= (1 << i); + } + + drawable->texture_stamp = drawable->dPriv->lastStamp; + drawable->texture_mask = statt_mask; + } + + if (!out) + return TRUE; + + for (i = 0; i < count; i++) { + out[i] = NULL; + pipe_resource_reference(&out[i], drawable->textures[statts[i]]); + } + + return TRUE; +} + +static boolean +dri_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi, + enum st_attachment_type statt) +{ + struct dri_drawable *drawable = + (struct dri_drawable *) stfbi->st_manager_private; + struct dri_screen *screen = dri_screen(drawable->sPriv); + + /* XXX remove this and just set the correct one on the framebuffer */ + screen->flush_frontbuffer(drawable, statt); + + return TRUE; +} + /** * This is called when we need to set up GL rendering to a new X window. */ @@ -58,9 +127,12 @@ dri_create_buffer(__DRIscreen * sPriv, goto fail; dri_fill_st_visual(&drawable->stvis, screen, visual); - drawable->stfb = dri_create_st_framebuffer(drawable); - if (drawable->stfb == NULL) - goto fail; + + /* setup the st_framebuffer_iface */ + drawable->base.visual = &drawable->stvis; + drawable->base.flush_front = dri_st_framebuffer_flush_front; + drawable->base.validate = dri_st_framebuffer_validate; + drawable->base.st_manager_private = (void *) drawable; drawable->sPriv = sPriv; drawable->dPriv = dPriv; @@ -78,16 +150,75 @@ void dri_destroy_buffer(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); + int i; dri1_swap_fences_clear(drawable); dri1_destroy_pipe_surface(drawable); - dri_destroy_st_framebuffer(drawable->stfb); + for (i = 0; i < ST_ATTACHMENT_COUNT; i++) + pipe_resource_reference(&drawable->textures[i], NULL); drawable->desired_fences = 0; FREE(drawable); } +/** + * Validate the texture at an attachment. Allocate the texture if it does not + * exist. + */ +void +dri_drawable_validate_att(struct dri_drawable *drawable, + enum st_attachment_type statt) +{ + enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; + unsigned i, count = 0; + + /* check if buffer already exists */ + if (drawable->texture_mask & (1 << statt)) + return; + + /* make sure DRI2 does not destroy existing buffers */ + for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { + if (drawable->texture_mask & (1 << i)) { + statts[count++] = i; + } + } + statts[count++] = statt; + + drawable->texture_stamp = drawable->dPriv->lastStamp - 1; + + /* this calles into the manager */ + drawable->base.validate(&drawable->base, statts, count, NULL); +} + +/** + * Get the format and binding of an attachment. + */ +void +dri_drawable_get_format(struct dri_drawable *drawable, + enum st_attachment_type statt, + enum pipe_format *format, + unsigned *bind) +{ + switch (statt) { + case ST_ATTACHMENT_FRONT_LEFT: + case ST_ATTACHMENT_BACK_LEFT: + case ST_ATTACHMENT_FRONT_RIGHT: + case ST_ATTACHMENT_BACK_RIGHT: + *format = drawable->stvis.color_format; + *bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case ST_ATTACHMENT_DEPTH_STENCIL: + *format = drawable->stvis.depth_stencil_format; + *bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ + break; + default: + *format = PIPE_FORMAT_NONE; + *bind = 0; + break; + } +} + /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 315b778165..5fd650ac88 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -42,14 +42,13 @@ struct dri_context; struct dri_drawable { + struct st_framebuffer_iface base; + struct st_visual stvis; + /* dri */ __DRIdrawable *dPriv; __DRIscreen *sPriv; - /* gallium */ - struct st_framebuffer_iface *stfb; - struct st_visual stvis; - __DRIbuffer old[8]; unsigned old_num; unsigned old_w; @@ -84,6 +83,16 @@ dri_create_buffer(__DRIscreen * sPriv, void dri_destroy_buffer(__DRIdrawable * dPriv); +void +dri_drawable_get_format(struct dri_drawable *drawable, + enum st_attachment_type statt, + enum pipe_format *format, + unsigned *bind); + +void +dri_drawable_validate_att(struct dri_drawable *drawable, + enum st_attachment_type statt); + #endif /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index 4bfbc6e80b..064c73f54c 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -38,7 +38,6 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri_st_api.h" #include "dri1_helper.h" #ifndef __NOT_HAVE_DRM_H #include "dri1.h" @@ -50,6 +49,7 @@ #include "util/u_inlines.h" #include "pipe/p_screen.h" #include "pipe/p_format.h" +#include "state_tracker/st_gl_api.h" /* for st_gl_api_create */ #include "util/u_debug.h" @@ -79,7 +79,7 @@ dri_fill_in_modes(struct dri_screen *screen, unsigned depth_buffer_factor; unsigned back_buffer_factor; unsigned msaa_samples_factor; - struct pipe_screen *p_screen = screen->pipe_screen; + struct pipe_screen *p_screen = screen->base.screen; boolean pf_r5g6b5, pf_a8r8g8b8, pf_x8r8g8b8; boolean pf_z16, pf_x8z24, pf_z24x8, pf_s8z24, pf_z24s8, pf_z32; @@ -283,6 +283,31 @@ dri_get_swap_info(__DRIdrawable * dPriv, __DRIswapInfo * sInfo) #endif +static boolean +dri_get_egl_image(struct st_manager *smapi, + struct st_egl_image *stimg) +{ + struct dri_context *ctx = + (struct dri_context *)stimg->stctxi->st_manager_private; + struct dri_screen *screen = dri_screen(ctx->sPriv); + __DRIimage *img = NULL; + + if (screen->lookup_egl_image) { + img = screen->lookup_egl_image(ctx, stimg->egl_image); + } + + if (!img) + return FALSE; + + stimg->texture = NULL; + pipe_resource_reference(&stimg->texture, img->texture); + stimg->face = img->face; + stimg->level = img->level; + stimg->zslice = img->zslice; + + return TRUE; +} + static void dri_destroy_option_cache(struct dri_screen * screen) { @@ -304,11 +329,11 @@ dri_destroy_screen_helper(struct dri_screen * screen) { dri1_destroy_pipe_context(screen); - if (screen->smapi) - dri_destroy_st_manager(screen->smapi); + if (screen->st_api && screen->st_api->destroy) + screen->st_api->destroy(screen->st_api); - if (screen->pipe_screen) - screen->pipe_screen->destroy(screen->pipe_screen); + if (screen->base.screen) + screen->base.screen->destroy(screen->base.screen); dri_destroy_option_cache(screen); } @@ -330,14 +355,16 @@ dri_init_screen_helper(struct dri_screen *screen, struct pipe_screen *pscreen, unsigned pixel_bits) { - screen->pipe_screen = pscreen; - if (!screen->pipe_screen) { + screen->base.screen = pscreen; + if (!screen->base.screen) { debug_printf("%s: failed to create pipe_screen\n", __FUNCTION__); return NULL; } - screen->smapi = dri_create_st_manager(screen); - if (!screen->smapi) + screen->base.get_egl_image = dri_get_egl_image; + screen->st_api = st_gl_api_create(); + + if (!screen->st_api) return NULL; driParseOptionInfo(&screen->optionCache, diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index 8ab7d43919..1740fa8f42 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -41,8 +41,15 @@ #include "state_tracker/st_api.h" #include "state_tracker/drm_api.h" +struct dri_context; +struct dri_drawable; + struct dri_screen { + /* st_api */ + struct st_manager base; + struct st_api *st_api; + /* dri */ __DRIscreen *sPriv; @@ -55,16 +62,21 @@ struct dri_screen int fd; drmLock *drmLock; + /* hooks filled in by dri1, dri2 & drisw */ + __DRIimage * (*lookup_egl_image)(struct dri_context *ctx, void *handle); + void (*allocate_textures)(struct dri_drawable *drawable, + const enum st_attachment_type *statts, + unsigned count); + void (*update_drawable_info)(struct dri_drawable *drawable); + void (*flush_frontbuffer)(struct dri_drawable *drawable, + enum st_attachment_type statt); + /* gallium */ struct drm_api *api; - struct pipe_winsys *pipe_winsys; - struct pipe_screen *pipe_screen; boolean d_depth_bits_last; boolean sd_depth_bits_last; boolean auto_fake_front; - struct st_manager *smapi; - /* used only by DRI1 */ struct pipe_context *dri1_pipe; }; @@ -76,6 +88,15 @@ dri_screen(__DRIscreen * sPriv) return (struct dri_screen *)sPriv->private; } +struct __DRIimageRec { + struct pipe_resource *texture; + unsigned face; + unsigned level; + unsigned zslice; + + void *loader_private; +}; + #ifndef __NOT_HAVE_DRM_H static INLINE boolean diff --git a/src/gallium/state_trackers/dri/common/dri_st_api.c b/src/gallium/state_trackers/dri/common/dri_st_api.c deleted file mode 100644 index 261bae75a2..0000000000 --- a/src/gallium/state_trackers/dri/common/dri_st_api.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 7.9 - * - * Copyright (C) 2010 LunarG Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu <olv@lunarg.com> - */ - -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_debug.h" -#include "state_tracker/st_manager.h" /* for st_manager_create_api */ - -#include "dri_screen.h" -#include "dri_context.h" -#include "dri_drawable.h" -#include "dri_st_api.h" -#ifndef __NOT_HAVE_DRM_H -#include "dri1.h" -#include "dri2.h" -#else -#include "drisw.h" -#endif - -static boolean -dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) -{ - struct dri_drawable *drawable = - (struct dri_drawable *) stfbi->st_manager_private; - unsigned statt_mask, new_mask; - boolean new_stamp; - int i; - - statt_mask = 0x0; - for (i = 0; i < count; i++) - statt_mask |= (1 << statts[i]); - - /* record newly allocated textures */ - new_mask = (statt_mask & ~drawable->texture_mask); - - /* - * dPriv->pStamp is the server stamp. It should be accessed with a lock, at - * least for DRI1. dPriv->lastStamp is the client stamp. It has the value - * of the server stamp when last checked. - */ - new_stamp = (drawable->texture_stamp != drawable->dPriv->lastStamp); - - if (new_stamp || new_mask) { - -#ifndef __NOT_HAVE_DRM_H - if (__dri1_api_hooks) { - dri1_allocate_textures(drawable, statt_mask); - } - else { - dri2_allocate_textures(drawable, statts, count); - } -#else - if (new_stamp) - drisw_update_drawable_info(drawable); - - drisw_allocate_textures(drawable, statt_mask); -#endif - - /* add existing textures */ - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { - if (drawable->textures[i]) - statt_mask |= (1 << i); - } - - drawable->texture_stamp = drawable->dPriv->lastStamp; - drawable->texture_mask = statt_mask; - } - - if (!out) - return TRUE; - - for (i = 0; i < count; i++) { - out[i] = NULL; - pipe_resource_reference(&out[i], drawable->textures[statts[i]]); - } - - return TRUE; -} - -static boolean -dri_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi, - enum st_attachment_type statt) -{ - struct dri_drawable *drawable = - (struct dri_drawable *) stfbi->st_manager_private; - -#ifndef __NOT_HAVE_DRM_H - if (__dri1_api_hooks) { - dri1_flush_frontbuffer(drawable, statt); - } - else { - dri2_flush_frontbuffer(drawable, statt); - } -#else - drisw_flush_frontbuffer(drawable, statt); -#endif - - return TRUE; -} - -/** - * Create a framebuffer from the given drawable. - */ -struct st_framebuffer_iface * -dri_create_st_framebuffer(struct dri_drawable *drawable) -{ - struct st_framebuffer_iface *stfbi; - - stfbi = CALLOC_STRUCT(st_framebuffer_iface); - if (stfbi) { - stfbi->visual = &drawable->stvis; - stfbi->flush_front = dri_st_framebuffer_flush_front; - stfbi->validate = dri_st_framebuffer_validate; - stfbi->st_manager_private = (void *) drawable; - } - - return stfbi; -} - -/** - * Destroy a framebuffer. - */ -void -dri_destroy_st_framebuffer(struct st_framebuffer_iface *stfbi) -{ - struct dri_drawable *drawable = - (struct dri_drawable *) stfbi->st_manager_private; - int i; - - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) - pipe_resource_reference(&drawable->textures[i], NULL); - - FREE(stfbi); -} - -/** - * Validate the texture at an attachment. Allocate the texture if it does not - * exist. - */ -void -dri_st_framebuffer_validate_att(struct st_framebuffer_iface *stfbi, - enum st_attachment_type statt) -{ - struct dri_drawable *drawable = - (struct dri_drawable *) stfbi->st_manager_private; - enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; - unsigned i, count = 0; - - /* check if buffer already exists */ - if (drawable->texture_mask & (1 << statt)) - return; - - /* make sure DRI2 does not destroy existing buffers */ - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { - if (drawable->texture_mask & (1 << i)) { - statts[count++] = i; - } - } - statts[count++] = statt; - - drawable->texture_stamp = drawable->dPriv->lastStamp - 1; - - stfbi->validate(stfbi, statts, count, NULL); -} - -/** - * Reference counted st_api. - */ -static struct { - int32_t refcnt; - struct st_api *stapi; -} dri_st_api; - -/** - * Add a reference to the st_api of the state tracker. - */ -static void -_dri_get_st_api(void) -{ - p_atomic_inc(&dri_st_api.refcnt); - if (p_atomic_read(&dri_st_api.refcnt) == 1) - dri_st_api.stapi = st_manager_create_api(); -} - -/** - * Remove a reference to the st_api of the state tracker. - */ -static void -_dri_put_st_api(void) -{ - struct st_api *stapi = dri_st_api.stapi; - - if (p_atomic_dec_zero(&dri_st_api.refcnt)) { - stapi->destroy(dri_st_api.stapi); - dri_st_api.stapi = NULL; - } -} - -static boolean -dri_st_manager_get_egl_image(struct st_manager *smapi, - struct st_egl_image *stimg) -{ - __DRIimage *img = NULL; - -#ifndef __NOT_HAVE_DRM_H - if (!__dri1_api_hooks) { - struct dri_context *ctx = (struct dri_context *) - stimg->stctxi->st_manager_private; - img = dri2_lookup_egl_image(ctx, stimg->egl_image); - } -#endif - if (!img) - return FALSE; - - stimg->texture = NULL; - pipe_resource_reference(&stimg->texture, img->texture); - stimg->face = img->face; - stimg->level = img->level; - stimg->zslice = img->zslice; - - return TRUE; -} - -/** - * Create a state tracker manager from the given screen. - */ -struct st_manager * -dri_create_st_manager(struct dri_screen *screen) -{ - struct st_manager *smapi; - - smapi = CALLOC_STRUCT(st_manager); - if (smapi) { - smapi->screen = screen->pipe_screen; - smapi->get_egl_image = dri_st_manager_get_egl_image; - _dri_get_st_api(); - } - - return smapi; -} - -/** - * Destroy a state tracker manager. - */ -void -dri_destroy_st_manager(struct st_manager *smapi) -{ - _dri_put_st_api(); - FREE(smapi); -} - -/** - * Return the st_api of OpenGL state tracker. - */ -struct st_api * -dri_get_st_api(void) -{ - assert(dri_st_api.stapi); - return dri_st_api.stapi; -} diff --git a/src/gallium/state_trackers/dri/common/dri_st_api.h b/src/gallium/state_trackers/dri/common/dri_st_api.h deleted file mode 100644 index 11d86cfbdf..0000000000 --- a/src/gallium/state_trackers/dri/common/dri_st_api.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 7.9 - * - * Copyright (C) 2010 LunarG Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu <olv@lunarg.com> - */ - -#ifndef _DRI_ST_API_H_ -#define _DRI_ST_API_H_ - -#include "state_tracker/st_api.h" - -struct dri_screen; -struct dri_drawable; - -struct __DRIimageRec { - struct pipe_resource *texture; - unsigned face; - unsigned level; - unsigned zslice; - - void *loader_private; -}; - -struct st_api * -dri_get_st_api(void); - -struct st_manager * -dri_create_st_manager(struct dri_screen *screen); - -void -dri_destroy_st_manager(struct st_manager *smapi); - -struct st_framebuffer_iface * -dri_create_st_framebuffer(struct dri_drawable *drawable); - -void -dri_destroy_st_framebuffer(struct st_framebuffer_iface *stfbi); - -void -dri_st_framebuffer_validate_att(struct st_framebuffer_iface *stfbi, - enum st_attachment_type statt); - -#endif /* _DRI_ST_API_H_ */ diff --git a/src/gallium/state_trackers/dri/drm/Makefile b/src/gallium/state_trackers/dri/drm/Makefile index 7a236da0c0..d9a973e3c3 100644 --- a/src/gallium/state_trackers/dri/drm/Makefile +++ b/src/gallium/state_trackers/dri/drm/Makefile @@ -16,7 +16,6 @@ C_SOURCES = \ dri_context.c \ dri_screen.c \ dri_drawable.c \ - dri_st_api.c \ dri1_helper.c \ dri1.c \ dri2.c diff --git a/src/gallium/state_trackers/dri/drm/SConscript b/src/gallium/state_trackers/dri/drm/SConscript index 1dfaa402f2..8800b65534 100644 --- a/src/gallium/state_trackers/dri/drm/SConscript +++ b/src/gallium/state_trackers/dri/drm/SConscript @@ -20,7 +20,6 @@ if env['dri']: source = [ 'dri_context.c', 'dri_drawable.c', 'dri_screen.c', - 'dri_st_api.c', 'dri1_helper.c', 'dri1.c', 'dri2.c', diff --git a/src/gallium/state_trackers/dri/drm/dri1.c b/src/gallium/state_trackers/dri/drm/dri1.c index e216e46a87..23c21ed839 100644 --- a/src/gallium/state_trackers/dri/drm/dri1.c +++ b/src/gallium/state_trackers/dri/drm/dri1.c @@ -104,13 +104,13 @@ dri1_propagate_drawable_change(struct dri_context *ctx) if (dPriv && draw->texture_stamp != dPriv->lastStamp) { ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); flushed = TRUE; - ctx->st->notify_invalid_framebuffer(ctx->st, draw->stfb); + ctx->st->notify_invalid_framebuffer(ctx->st, &draw->base); } if (rPriv && dPriv != rPriv && read->texture_stamp != rPriv->lastStamp) { if (!flushed) ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); - ctx->st->notify_invalid_framebuffer(ctx->st, read->stfb); + ctx->st->notify_invalid_framebuffer(ctx->st, &read->base); } } @@ -253,13 +253,13 @@ dri1_copy_to_front(struct dri_context *ctx, * Backend functions for st_framebuffer interface and swap_buffers. */ -void +static void dri1_flush_frontbuffer(struct dri_drawable *draw, enum st_attachment_type statt) { - struct dri_context *ctx = dri_get_current(); + struct dri_context *ctx = dri_get_current(draw->sPriv); struct dri_screen *screen = dri_screen(draw->sPriv); - struct pipe_screen *pipe_screen = screen->pipe_screen; + struct pipe_screen *pipe_screen = screen->base.screen; struct pipe_fence_handle *dummy_fence; struct pipe_resource *ptex; @@ -280,10 +280,10 @@ dri1_flush_frontbuffer(struct dri_drawable *draw, void dri1_swap_buffers(__DRIdrawable * dPriv) { - struct dri_context *ctx = dri_get_current(); struct dri_drawable *draw = dri_drawable(dPriv); + struct dri_context *ctx = dri_get_current(draw->sPriv); struct dri_screen *screen = dri_screen(draw->sPriv); - struct pipe_screen *pipe_screen = screen->pipe_screen; + struct pipe_screen *pipe_screen = screen->base.screen; struct pipe_fence_handle *fence; struct pipe_resource *ptex; @@ -309,9 +309,9 @@ dri1_swap_buffers(__DRIdrawable * dPriv) void dri1_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h) { - struct dri_context *ctx = dri_get_current(); + struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv); struct dri_screen *screen = dri_screen(dPriv->driScreenPriv); - struct pipe_screen *pipe_screen = screen->pipe_screen; + struct pipe_screen *pipe_screen = screen->base.screen; struct drm_clip_rect sub_bbox; struct dri_drawable *draw = dri_drawable(dPriv); struct pipe_fence_handle *dummy_fence; @@ -342,9 +342,10 @@ dri1_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h) * as they are requested. Unused attachments are not removed, not until the * framebuffer is resized or destroyed. */ -void +static void dri1_allocate_textures(struct dri_drawable *drawable, - unsigned mask) + const enum st_attachment_type *statts, + unsigned count) { struct dri_screen *screen = dri_screen(drawable->sPriv); struct pipe_resource templ; @@ -371,40 +372,24 @@ dri1_allocate_textures(struct dri_drawable *drawable, templ.depth0 = 1; templ.last_level = 0; - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { + for (i = 0; i < count; i++) { enum pipe_format format; - unsigned tex_usage; + unsigned bind; - /* the texture already exists or not requested */ - if (drawable->textures[i] || !(mask & (1 << i))) { + /* the texture already exists */ + if (drawable->textures[statts[i]]) continue; - } - switch (i) { - case ST_ATTACHMENT_FRONT_LEFT: - case ST_ATTACHMENT_BACK_LEFT: - case ST_ATTACHMENT_FRONT_RIGHT: - case ST_ATTACHMENT_BACK_RIGHT: - format = drawable->stvis.color_format; - tex_usage = PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_RENDER_TARGET; - break; - case ST_ATTACHMENT_DEPTH_STENCIL: - format = drawable->stvis.depth_stencil_format; - tex_usage = PIPE_BIND_DEPTH_STENCIL; - break; - default: - format = PIPE_FORMAT_NONE; - break; - } + dri_drawable_get_format(drawable, statts[i], &format, &bind); + + if (format == PIPE_FORMAT_NONE) + continue; - if (format != PIPE_FORMAT_NONE) { - templ.format = format; - templ.bind = tex_usage; + templ.format = format; + templ.bind = bind; - drawable->textures[i] = - screen->pipe_screen->resource_create(screen->pipe_screen, &templ); - } + drawable->textures[statts[i]] = + screen->base.screen->resource_create(screen->base.screen, &templ); } drawable->old_w = width; @@ -489,6 +474,8 @@ dri1_init_screen(__DRIscreen * sPriv) screen->sPriv = sPriv; screen->fd = sPriv->fd; screen->drmLock = (drmLock *) & sPriv->pSAREA->lock; + screen->allocate_textures = dri1_allocate_textures; + screen->flush_frontbuffer = dri1_flush_frontbuffer; sPriv->private = (void *)screen; sPriv->extensions = dri1_screen_extensions; diff --git a/src/gallium/state_trackers/dri/drm/dri1.h b/src/gallium/state_trackers/dri/drm/dri1.h index f7441f98ab..a50188b368 100644 --- a/src/gallium/state_trackers/dri/drm/dri1.h +++ b/src/gallium/state_trackers/dri/drm/dri1.h @@ -43,14 +43,6 @@ extern struct dri1_api *__dri1_api_hooks; const __DRIconfig ** dri1_init_screen(__DRIscreen * sPriv); -void -dri1_flush_frontbuffer(struct dri_drawable *drawable, - enum st_attachment_type statt); - -void -dri1_allocate_textures(struct dri_drawable *drawable, - unsigned mask); - void dri1_swap_buffers(__DRIdrawable * dPriv); void diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 0d15b5c9b8..e1216f14c0 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -38,9 +38,10 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri_st_api.h" #include "dri2.h" +#include "GL/internal/dri_interface.h" + /** * DRI2 flush extension. */ @@ -59,7 +60,7 @@ dri2_invalidate_drawable(__DRIdrawable *dPriv) drawable->dPriv->lastStamp = *drawable->dPriv->pStamp; if (ctx) - ctx->st->notify_invalid_framebuffer(ctx->st, drawable->stfb); + ctx->st->notify_invalid_framebuffer(ctx->st, &drawable->base); } static const __DRI2flushExtension dri2FlushExtension = { @@ -79,7 +80,7 @@ dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_resource *pt; - dri_st_framebuffer_validate_att(drawable->stfb, ST_ATTACHMENT_FRONT_LEFT); + dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT); pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT]; @@ -120,30 +121,31 @@ static const __DRItexBufferExtension dri2TexBufferExtension = { }; /** - * Get the format of an attachment. + * Get the format and binding of an attachment. */ -static INLINE enum pipe_format +static INLINE void dri2_drawable_get_format(struct dri_drawable *drawable, - enum st_attachment_type statt) + enum st_attachment_type statt, + enum pipe_format *format, + unsigned *bind) { - enum pipe_format format; - switch (statt) { case ST_ATTACHMENT_FRONT_LEFT: case ST_ATTACHMENT_BACK_LEFT: case ST_ATTACHMENT_FRONT_RIGHT: case ST_ATTACHMENT_BACK_RIGHT: - format = drawable->stvis.color_format; + *format = drawable->stvis.color_format; + *bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case ST_ATTACHMENT_DEPTH_STENCIL: - format = drawable->stvis.depth_stencil_format; + *format = drawable->stvis.depth_stencil_format; + *bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ break; default: - format = PIPE_FORMAT_NONE; + *format = PIPE_FORMAT_NONE; + *bind = 0; break; } - - return format; } @@ -174,9 +176,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable, for (i = 0; i < *count; i++) { enum pipe_format format; + unsigned bind; int att, bpp; - format = dri2_drawable_get_format(drawable, statts[i]); + dri2_drawable_get_format(drawable, statts[i], &format, &bind); if (format == PIPE_FORMAT_NONE) continue; @@ -263,7 +266,7 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, struct pipe_resource templ; struct winsys_handle whandle; boolean have_depth = FALSE; - unsigned i; + unsigned i, bind; if (drawable->old_num == count && drawable->old_w == dri_drawable->w && @@ -275,7 +278,6 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, pipe_resource_reference(&drawable->textures[i], NULL); memset(&templ, 0, sizeof(templ)); - templ.bind = PIPE_BIND_RENDER_TARGET; templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; templ.width0 = dri_drawable->w; @@ -319,16 +321,17 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, break; } - format = dri2_drawable_get_format(drawable, statt); + dri2_drawable_get_format(drawable, statt, &format, &bind); if (statt == ST_ATTACHMENT_INVALID || format == PIPE_FORMAT_NONE) continue; templ.format = format; + templ.bind = bind; whandle.handle = buf->name; whandle.stride = buf->pitch; drawable->textures[statt] = - screen->pipe_screen->resource_from_handle(screen->pipe_screen, + screen->base.screen->resource_from_handle(screen->base.screen, &templ, &whandle); } @@ -342,7 +345,7 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, * Backend functions for st_framebuffer interface. */ -void +static void dri2_allocate_textures(struct dri_drawable *drawable, const enum st_attachment_type *statts, unsigned count) @@ -354,7 +357,7 @@ dri2_allocate_textures(struct dri_drawable *drawable, dri2_drawable_process_buffers(drawable, buffers, num_buffers); } -void +static void dri2_flush_frontbuffer(struct dri_drawable *drawable, enum st_attachment_type statt) { @@ -369,7 +372,7 @@ dri2_flush_frontbuffer(struct dri_drawable *drawable, } } -__DRIimage * +static __DRIimage * dri2_lookup_egl_image(struct dri_context *ctx, void *handle) { __DRIimageLookupExtension *loader = ctx->sPriv->dri2.image; @@ -431,7 +434,7 @@ dri2_create_image_from_name(__DRIcontext *context, whandle.handle = name; whandle.stride = pitch * util_format_get_blocksize(pf); - img->texture = screen->pipe_screen->resource_from_handle(screen->pipe_screen, + img->texture = screen->base.screen->resource_from_handle(screen->base.screen, &templ, &whandle); if (!img->texture) { FREE(img); @@ -508,6 +511,9 @@ dri2_init_screen(__DRIscreen * sPriv) screen->api = drm_api_create(); screen->sPriv = sPriv; screen->fd = sPriv->fd; + screen->lookup_egl_image = dri2_lookup_egl_image; + screen->allocate_textures = dri2_allocate_textures; + screen->flush_frontbuffer = dri2_flush_frontbuffer; sPriv->private = (void *)screen; sPriv->extensions = dri_screen_extensions; diff --git a/src/gallium/state_trackers/dri/drm/dri2.h b/src/gallium/state_trackers/dri/drm/dri2.h index 5b28850000..07adfe4f6c 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.h +++ b/src/gallium/state_trackers/dri/drm/dri2.h @@ -34,16 +34,4 @@ const __DRIconfig ** dri2_init_screen(__DRIscreen * sPriv); -void -dri2_flush_frontbuffer(struct dri_drawable *drawable, - enum st_attachment_type statt); - -void -dri2_allocate_textures(struct dri_drawable *drawable, - const enum st_attachment_type *statts, - unsigned count); - -__DRIimage * -dri2_lookup_egl_image(struct dri_context *ctx, void *handle); - #endif /* DRI2_H */ diff --git a/src/gallium/state_trackers/dri/drm/dri_st_api.c b/src/gallium/state_trackers/dri/drm/dri_st_api.c deleted file mode 120000 index a8f6bd06b0..0000000000 --- a/src/gallium/state_trackers/dri/drm/dri_st_api.c +++ /dev/null @@ -1 +0,0 @@ -../common/dri_st_api.c
\ No newline at end of file diff --git a/src/gallium/state_trackers/dri/sw/Makefile b/src/gallium/state_trackers/dri/sw/Makefile index 18d7aabd9f..c0ae71451b 100644 --- a/src/gallium/state_trackers/dri/sw/Makefile +++ b/src/gallium/state_trackers/dri/sw/Makefile @@ -19,7 +19,6 @@ C_SOURCES = \ dri_context.c \ dri_screen.c \ dri_drawable.c \ - dri_st_api.c \ dri1_helper.c \ drisw.c diff --git a/src/gallium/state_trackers/dri/sw/SConscript b/src/gallium/state_trackers/dri/sw/SConscript index c97124c831..6bb282d1a4 100644 --- a/src/gallium/state_trackers/dri/sw/SConscript +++ b/src/gallium/state_trackers/dri/sw/SConscript @@ -20,7 +20,6 @@ if env['dri']: source = [ 'dri_context.c', 'dri_drawable.c', 'dri_screen.c', - 'dri_st_api.c', 'dri1_helper.c', 'drisw.c', ] diff --git a/src/gallium/state_trackers/dri/sw/dri_st_api.c b/src/gallium/state_trackers/dri/sw/dri_st_api.c deleted file mode 120000 index a8f6bd06b0..0000000000 --- a/src/gallium/state_trackers/dri/sw/dri_st_api.c +++ /dev/null @@ -1 +0,0 @@ -../common/dri_st_api.c
\ No newline at end of file diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 9edddf01b5..dcf645593f 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -70,7 +70,7 @@ put_image(__DRIdrawable *dPriv, void *data, unsigned width, unsigned height) data, dPriv->loaderPrivate); } -void +static void drisw_update_drawable_info(struct dri_drawable *drawable) { __DRIdrawable *dPriv = drawable->dPriv; @@ -99,20 +99,20 @@ drisw_present_texture(__DRIdrawable *dPriv, if (!psurf) return; - screen->pipe_screen->flush_frontbuffer(screen->pipe_screen, psurf, drawable); + screen->base.screen->flush_frontbuffer(screen->base.screen, psurf, drawable); } static INLINE void drisw_invalidate_drawable(__DRIdrawable *dPriv) { - struct dri_context *ctx = dri_get_current(); + struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv); struct dri_drawable *drawable = dri_drawable(dPriv); drawable->texture_stamp = dPriv->lastStamp - 1; /* check if swapping currently bound buffer */ if (ctx && ctx->dPriv == dPriv) - ctx->st->notify_invalid_framebuffer(ctx->st, drawable->stfb); + ctx->st->notify_invalid_framebuffer(ctx->st, &drawable->base); } static INLINE void @@ -131,7 +131,7 @@ drisw_copy_to_front(__DRIdrawable * dPriv, void drisw_swap_buffers(__DRIdrawable *dPriv) { - struct dri_context *ctx = dri_get_current(); + struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv); struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_resource *ptex; @@ -147,11 +147,11 @@ drisw_swap_buffers(__DRIdrawable *dPriv) } } -void +static void drisw_flush_frontbuffer(struct dri_drawable *drawable, enum st_attachment_type statt) { - struct dri_context *ctx = dri_get_current(); + struct dri_context *ctx = dri_get_current(drawable->sPriv); struct pipe_resource *ptex; if (!ctx) @@ -175,9 +175,10 @@ drisw_flush_frontbuffer(struct dri_drawable *drawable, * seems a better seperation and safer for each DRI version to provide its own * function. */ -void +static void drisw_allocate_textures(struct dri_drawable *drawable, - unsigned mask) + const enum st_attachment_type *statts, + unsigned count) { struct dri_screen *screen = dri_screen(drawable->sPriv); struct pipe_resource templ; @@ -206,38 +207,25 @@ drisw_allocate_textures(struct dri_drawable *drawable, for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { enum pipe_format format; - unsigned tex_usage; + unsigned bind; /* the texture already exists or not requested */ - if (drawable->textures[i] || !(mask & (1 << i))) { + if (drawable->textures[statts[i]]) continue; - } - - switch (i) { - case ST_ATTACHMENT_FRONT_LEFT: - case ST_ATTACHMENT_BACK_LEFT: - case ST_ATTACHMENT_FRONT_RIGHT: - case ST_ATTACHMENT_BACK_RIGHT: - format = drawable->stvis.color_format; - tex_usage = PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_RENDER_TARGET; - break; - case ST_ATTACHMENT_DEPTH_STENCIL: - format = drawable->stvis.depth_stencil_format; - tex_usage = PIPE_BIND_DEPTH_STENCIL; - break; - default: - format = PIPE_FORMAT_NONE; - break; - } - - if (format != PIPE_FORMAT_NONE) { - templ.format = format; - templ.bind = tex_usage; - - drawable->textures[i] = - screen->pipe_screen->resource_create(screen->pipe_screen, &templ); - } + + dri_drawable_get_format(drawable, statts[i], &format, &bind); + + if (statts[i] != ST_ATTACHMENT_DEPTH_STENCIL) + bind |= PIPE_BIND_DISPLAY_TARGET; + + if (format == PIPE_FORMAT_NONE) + continue; + + templ.format = format; + templ.bind = bind; + + drawable->textures[statts[i]] = + screen->base.screen->resource_create(screen->base.screen, &templ); } drawable->old_w = width; @@ -270,6 +258,9 @@ drisw_init_screen(__DRIscreen * sPriv) screen->api = NULL; /* not needed */ screen->sPriv = sPriv; screen->fd = -1; + screen->allocate_textures = drisw_allocate_textures; + screen->update_drawable_info = drisw_update_drawable_info; + screen->flush_frontbuffer = drisw_flush_frontbuffer; sPriv->private = (void *)screen; sPriv->extensions = drisw_screen_extensions; diff --git a/src/gallium/state_trackers/dri/sw/drisw.h b/src/gallium/state_trackers/dri/sw/drisw.h index c0c874f732..6c6c891f35 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.h +++ b/src/gallium/state_trackers/dri/sw/drisw.h @@ -38,17 +38,6 @@ const __DRIconfig ** drisw_init_screen(__DRIscreen * sPriv); -void -drisw_update_drawable_info(struct dri_drawable *drawable); - -void -drisw_flush_frontbuffer(struct dri_drawable *drawable, - enum st_attachment_type statt); - -void -drisw_allocate_textures(struct dri_drawable *drawable, - unsigned mask); - void drisw_swap_buffers(__DRIdrawable * dPriv); #endif /* DRISW_H */ diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 57a479f6bc..9744547868 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -49,41 +49,39 @@ egl_g3d_st_manager(struct st_manager *smapi) struct st_api * egl_g3d_create_st_api(enum st_api_type api) { - const char *stmod_name; struct util_dl_library *lib; - const struct st_module *mod; + const char *proc_name; + struct st_api * (*proc)(void) = NULL; switch (api) { case ST_API_OPENGL: - stmod_name = ST_MODULE_OPENGL_SYMBOL; + proc_name = ST_CREATE_OPENGL_SYMBOL; break; case ST_API_OPENGL_ES1: - stmod_name = ST_MODULE_OPENGL_ES1_SYMBOL; + proc_name = ST_CREATE_OPENGL_ES1_SYMBOL; break; case ST_API_OPENGL_ES2: - stmod_name = ST_MODULE_OPENGL_ES2_SYMBOL; + proc_name = ST_CREATE_OPENGL_ES2_SYMBOL; break; case ST_API_OPENVG: - stmod_name = ST_MODULE_OPENVG_SYMBOL; + proc_name = ST_CREATE_OPENVG_SYMBOL; break; default: - stmod_name = NULL; - break; - } - if (!stmod_name) + assert(!"Unknown API Type\n"); return NULL; + } - mod = NULL; lib = util_dl_open(NULL); if (lib) { - mod = (const struct st_module *) - util_dl_get_proc_address(lib, stmod_name); + proc = util_dl_get_proc_address(lib, proc_name); + debug_printf("%s: %s %p\n", __func__, proc_name, proc); util_dl_close(lib); } - if (!mod || mod->api != api) + + if (!proc) return NULL; - return mod->create_api(); + return proc(); } static boolean diff --git a/src/gallium/state_trackers/es/st_es1.c b/src/gallium/state_trackers/es/st_es1.c index 4e89e06b34..825fdac215 100644 --- a/src/gallium/state_trackers/es/st_es1.c +++ b/src/gallium/state_trackers/es/st_es1.c @@ -1,8 +1,7 @@ -#include "state_tracker/st_manager.h" +#include "state_tracker/st_gl_api.h" -PUBLIC const int st_api_OpenGL_ES1 = 1; - -PUBLIC const struct st_module st_module_OpenGL_ES1 = { - .api = ST_API_OPENGL_ES1, - .create_api = st_manager_create_api -}; +PUBLIC struct st_api * +st_api_create_OpenGL_ES1() +{ + return st_gl_api_create(); +} diff --git a/src/gallium/state_trackers/es/st_es2.c b/src/gallium/state_trackers/es/st_es2.c index 82e88b176a..5c773aaf93 100644 --- a/src/gallium/state_trackers/es/st_es2.c +++ b/src/gallium/state_trackers/es/st_es2.c @@ -1,8 +1,8 @@ -#include "state_tracker/st_manager.h" +#include "state_tracker/st_gl_api.h" -PUBLIC const int st_api_OpenGL_ES2 = 1; - -PUBLIC const struct st_module st_module_OpenGL_ES2 = { - .api = ST_API_OPENGL_ES2, - .create_api = st_manager_create_api -}; +PUBLIC struct st_api * +st_api_create_OpenGL_ES2() +{ + /* linker magic creates different versions */ + return st_gl_api_create(); +} diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c index 294b593bf7..1c678b4f76 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_st.c +++ b/src/gallium/state_trackers/glx/xlib/xm_st.c @@ -125,7 +125,7 @@ xmesa_st_framebuffer_copy_textures(struct st_framebuffer_iface *stfbi, /** * Remove outdated textures and create the requested ones. */ -static void +static boolean xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, unsigned width, unsigned height, unsigned mask) @@ -183,12 +183,16 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, xstfb->textures[i] = xstfb->screen->resource_create(xstfb->screen, &templ); + if (!xstfb->textures[i]) + return FALSE; } } xstfb->texture_width = width; xstfb->texture_height = height; xstfb->texture_mask = mask; + + return TRUE; } static boolean @@ -200,6 +204,7 @@ xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, struct xmesa_st_framebuffer *xstfb = xmesa_st_framebuffer(stfbi); unsigned statt_mask, new_mask, i; boolean resized; + boolean ret; statt_mask = 0x0; for (i = 0; i < count; i++) @@ -212,8 +217,10 @@ xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, /* revalidate textures */ if (resized || new_mask) { - xmesa_st_framebuffer_validate_textures(stfbi, - xstfb->buffer->width, xstfb->buffer->height, statt_mask); + ret = xmesa_st_framebuffer_validate_textures(stfbi, + xstfb->buffer->width, xstfb->buffer->height, statt_mask); + if (!ret) + return ret; if (!resized) { enum st_attachment_type back, front; diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c index e4226754d1..aecac28e7e 100644 --- a/src/gallium/state_trackers/vega/vg_manager.c +++ b/src/gallium/state_trackers/vega/vg_manager.c @@ -546,26 +546,17 @@ vg_api_destroy(struct st_api *stapi) free(stapi); } -static struct st_api * -vg_module_create_api(void) -{ - struct st_api *stapi; - - stapi = CALLOC_STRUCT(st_api); - if (stapi) { - stapi->destroy = vg_api_destroy; - stapi->get_proc_address = vg_api_get_proc_address; - stapi->is_visual_supported = vg_api_is_visual_supported; - - stapi->create_context = vg_api_create_context; - stapi->make_current = vg_api_make_current; - stapi->get_current = vg_api_get_current; - } +struct st_api st_vg_api = { + vg_api_destroy, + vg_api_get_proc_address, + vg_api_is_visual_supported, + vg_api_create_context, + vg_api_make_current, + vg_api_get_current, +}; - return stapi; +struct st_api * +st_api_create_OpenVG(void) +{ + return &st_vg_api; } - -PUBLIC const struct st_module st_module_OpenVG = { - .api = ST_API_OPENVG, - .create_api = vg_module_create_api, -}; diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c index f4ea61ed2c..bcdd82e4f6 100644 --- a/src/gallium/state_trackers/wgl/stw_st.c +++ b/src/gallium/state_trackers/wgl/stw_st.c @@ -27,7 +27,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" -#include "state_tracker/st_manager.h" /* for st_manager_create_api */ +#include "state_tracker/st_gl_api.h" /* for st_gl_api_create */ #include "stw_st.h" #include "stw_device.h" @@ -308,5 +308,5 @@ stw_st_swap_framebuffer_locked(struct st_framebuffer_iface *stfb) struct st_api * stw_st_create_api(void) { - return st_manager_create_api(); + return st_gl_api_create(); } diff --git a/src/gallium/targets/Makefile.dri b/src/gallium/targets/Makefile.dri index 8efbf4e828..3cbaf615e2 100644 --- a/src/gallium/targets/Makefile.dri +++ b/src/gallium/targets/Makefile.dri @@ -1,5 +1,14 @@ # -*-makefile-*- +ifeq ($(MESA_LLVM),1) +DRIVER_DEFINES += -DGALLIUM_LLVMPIPE +PIPE_DRIVERS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +LDFLAGS += $(LLVM_LDFLAGS) +LD = g++ +DRIVER_EXTRAS = $(LLVM_LIBS) +USE_CXX=1 +endif + MESA_MODULES = \ $(TOP)/src/mesa/libmesagallium.a \ $(GALLIUM_AUXILIARIES) @@ -69,7 +78,11 @@ $(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) Makefile \ $(OBJECTS) $(PIPE_DRIVERS) \ -Wl,--start-group $(MESA_MODULES) -Wl,--end-group \ $(DRI_LIB_DEPS) $(DRIVER_EXTRAS) - $(CC) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS) + if [ "x${USE_CXX}" == "x" ]; then \ + $(CC) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS); \ + else \ + $(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS); \ + fi @rm -f $@.test mv -f $@.tmp $@ diff --git a/src/gallium/targets/dri-swrast/Makefile b/src/gallium/targets/dri-swrast/Makefile index fcfd690e43..3db9781c20 100644 --- a/src/gallium/targets/dri-swrast/Makefile +++ b/src/gallium/targets/dri-swrast/Makefile @@ -8,7 +8,8 @@ DRIVER_DEFINES = -D__NOT_HAVE_DRM_H -DGALLIUM_SOFTPIPE PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/sw/libdrisw.a \ $(TOP)/src/gallium/winsys/sw/dri/libswdri.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a SWRAST_COMMON_GALLIUM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/utils.c \ diff --git a/src/gallium/targets/dri-swrast/swrast_drm_api.c b/src/gallium/targets/dri-swrast/swrast_drm_api.c index e8d6d8069c..84142be80c 100644 --- a/src/gallium/targets/dri-swrast/swrast_drm_api.c +++ b/src/gallium/targets/dri-swrast/swrast_drm_api.c @@ -31,6 +31,7 @@ #include "state_tracker/drm_api.h" #include "state_tracker/sw_winsys.h" #include "dri_sw_winsys.h" +#include "trace/tr_public.h" /* Copied from targets/libgl-xlib */ @@ -80,7 +81,7 @@ swrast_create_screen(struct sw_winsys *winsys) screen = softpipe_create_screen( winsys ); #endif - return screen; + return trace_screen_create(screen);; } struct pipe_screen * diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c index 48e5bdff42..69b4ddd33f 100644 --- a/src/gallium/targets/libgl-xlib/xlib.c +++ b/src/gallium/targets/libgl-xlib/xlib.c @@ -36,15 +36,15 @@ #include "state_tracker/xlib_sw_winsys.h" #include "xm_public.h" -#include "state_tracker/st_manager.h" +#include "state_tracker/st_gl_api.h" -/* advertise OpenGL support */ -PUBLIC const int st_api_OpenGL = 1; +/* piggy back on this libGL for OpenGL support in EGL */ +struct st_api * +st_api_create_OpenGL() +{ + return st_gl_api_create(); +} -PUBLIC const struct st_module st_module_OpenGL = { - .api = ST_API_OPENGL, - .create_api = st_manager_create_api -}; /* Helper function to choose and instantiate one of the software rasterizers: * cell, llvmpipe, softpipe. @@ -151,7 +151,7 @@ fail: static struct xm_driver xlib_driver = { .create_pipe_screen = swrast_xlib_create_screen, - .create_st_api = st_manager_create_api, + .create_st_api = st_gl_api_create, }; diff --git a/src/gallium/winsys/i915/sw/i915_sw_buffer.c b/src/gallium/winsys/i915/sw/i915_sw_buffer.c index 9a27da5e1a..df17568886 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_buffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_buffer.c @@ -27,7 +27,7 @@ i915_sw_buffer_create(struct i915_winsys *iws, buf->magic = 0xDEAD1337; buf->name = name; buf->type = type; - buf->ptr = calloc(size, 1); + buf->ptr = CALLOC(size, 1); if (!buf->ptr) goto err; diff --git a/src/gallium/winsys/i965/drm/SConscript b/src/gallium/winsys/i965/drm/SConscript index 150ab19a33..abf9aac5c0 100644 --- a/src/gallium/winsys/i965/drm/SConscript +++ b/src/gallium/winsys/i965/drm/SConscript @@ -2,6 +2,8 @@ Import('*') env = env.Clone() +env.ParseConfig('pkg-config --cflags libdrm') + i965drm_sources = [ 'i965_drm_api.c', 'i965_drm_buffer.c', diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index fab4292951..60e409fe10 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -8,6 +8,7 @@ radeon_sources = [ 'radeon_r300.c', ] +env.ParseConfig('pkg-config --cflags libdrm_radeon') env.Append(CPPPATH = '#/src/gallium/drivers/r300') radeonwinsys = env.ConvenienceLibrary( diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index 8c22738004..8d981b22e3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -100,8 +100,8 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) version->version_minor >= 1; #endif - /* XXX */ - winsys->tex3d_mip_bug = TRUE; + winsys->drm_2_3_0 = version->version_major > 2 || + version->version_minor >= 3; info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index 9824ada5b3..b836649892 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -22,6 +22,8 @@ struct radeon_drm_buffer { boolean flinked; uint32_t flink; + uint32_t tileflags; + uint32_t pitch; struct radeon_drm_buffer *next, *prev; }; @@ -318,6 +320,9 @@ void radeon_drm_bufmgr_get_tiling(struct pb_buffer *_buf, radeon_bo_get_tiling(buf->bo, &flags, &pitch); + buf->tileflags = flags; + buf->pitch = pitch; + *microtiled = R300_BUFFER_LINEAR; *macrotiled = R300_BUFFER_LINEAR; if (flags & RADEON_BO_FLAGS_MICRO_TILE) @@ -333,7 +338,7 @@ void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, uint32_t pitch) { struct radeon_drm_buffer *buf = get_drm_buffer(_buf); - uint32_t flags = 0, old_flags, old_pitch; + uint32_t flags = 0; if (microtiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MICRO_TILE; /* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */ @@ -344,17 +349,15 @@ void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, if (macrotiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MACRO_TILE; - radeon_bo_get_tiling(buf->bo, &old_flags, &old_pitch); - - if (flags != old_flags || pitch != old_pitch) { + if (flags != buf->tileflags || pitch != buf->pitch) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) { buf->mgr->rws->flush_cb(buf->mgr->rws->flush_data); } - } - radeon_bo_set_tiling(buf->bo, flags, pitch); + radeon_bo_set_tiling(buf->bo, flags, pitch); + } } boolean radeon_drm_bufmgr_add_buffer(struct pb_buffer *_buf, diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 2fcf7cf982..80923de937 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -201,6 +201,13 @@ static void radeon_write_cs_dword(struct r300_winsys_screen *rws, radeon_cs_write_dword(ws->cs, dword); } +static void radeon_write_cs_table(struct r300_winsys_screen *rws, + const void *table, unsigned count) +{ + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + radeon_cs_write_table(ws->cs, table, count); +} + static void radeon_write_cs_reloc(struct r300_winsys_screen *rws, struct r300_winsys_buffer *buf, uint32_t rd, @@ -265,8 +272,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->z_pipes; case R300_VID_SQUARE_TILING_SUPPORT: return ws->squaretiling; - case R300_VID_TEX3D_MIP_BUG: - return ws->tex3d_mip_bug; + case R300_VID_DRM_2_3_0: + return ws->drm_2_3_0; } return 0; } @@ -322,6 +329,7 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) ws->base.check_cs = radeon_check_cs; ws->base.begin_cs = radeon_begin_cs; ws->base.write_cs_dword = radeon_write_cs_dword; + ws->base.write_cs_table = radeon_write_cs_table; ws->base.write_cs_reloc = radeon_write_cs_reloc; ws->base.end_cs = radeon_end_cs; ws->base.flush_cs = radeon_flush_cs; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 396f258c31..ca789be8e9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -60,8 +60,12 @@ struct radeon_libdrm_winsys { /* Square tiling support. */ boolean squaretiling; - /* Square tiling support. */ - boolean tex3d_mip_bug; + /* DRM 2.3.0 + * - R500 VAP regs + * - MSPOS regs + * - Fixed texture 3D size calculation + */ + boolean drm_2_3_0; /* DRM FD */ int fd; diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c index 90ffc4868f..104d03f273 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.c +++ b/src/gallium/winsys/svga/drm/vmw_context.c @@ -114,6 +114,19 @@ vmw_svga_winsys_context(struct svga_winsys_context *swc) } +static INLINE unsigned +vmw_translate_to_pb_flags(unsigned flags) +{ + unsigned f = 0; + if (flags & SVGA_RELOC_READ) + f |= PB_USAGE_GPU_READ; + + if (flags & SVGA_RELOC_WRITE) + f |= PB_USAGE_GPU_WRITE; + + return f; +} + static enum pipe_error vmw_swc_flush(struct svga_winsys_context *swc, struct pipe_fence_handle **pfence) @@ -264,6 +277,7 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); struct vmw_region_relocation *reloc; + unsigned translated_flags; enum pipe_error ret; assert(vswc->region.staged < vswc->region.reserved); @@ -275,7 +289,8 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, ++vswc->region.staged; - ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, flags); + translated_flags = vmw_translate_to_pb_flags(flags); + ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, translated_flags); /* TODO: Update pipebuffer to reserve buffers and not fail here */ assert(ret == PIPE_OK); diff --git a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c index ec4f919d08..56d2df825d 100644 --- a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c +++ b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c @@ -48,6 +48,8 @@ #include <sys/shm.h> #include <X11/extensions/XShm.h> +DEBUG_GET_ONCE_BOOL_OPTION(xlib_no_shm, "XLIB_NO_SHM", FALSE) + /** * Display target for Xlib winsys. * Low-level OS/window system memory buffer @@ -122,6 +124,9 @@ static char *alloc_shm(struct xm_displaytarget *buf, unsigned size) { XShmSegmentInfo *const shminfo = & buf->shminfo; + shminfo->shmid = -1; + shminfo->shmaddr = (char *) -1; + shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777); if (shminfo->shmid < 0) { return NULL; @@ -383,15 +388,11 @@ xm_displaytarget_create(struct sw_winsys *winsys, xm_dt->stride = align(util_format_get_stride(format, width), alignment); size = xm_dt->stride * nblocksy; - if (!debug_get_bool_option("XLIB_NO_SHM", FALSE)) - { - xm_dt->shminfo.shmid = -1; - xm_dt->shminfo.shmaddr = (char *) -1; - xm_dt->shm = TRUE; - + if (!debug_get_option_xlib_no_shm()) { xm_dt->data = alloc_shm(xm_dt, size); - if(!xm_dt->data) - goto no_data; + if (xm_dt->data) { + xm_dt->shm = TRUE; + } } if(!xm_dt->data) { |