diff options
Diffstat (limited to 'src/gallium/drivers')
96 files changed, 1580 insertions, 1285 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d2166a4901..d7788bd9bb 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1622,14 +1622,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - ASSERT (0); - break; - - case TGSI_OPCODE_ENDREP: - ASSERT (0); - break; - case TGSI_OPCODE_PUSHA: ASSERT (0); break; @@ -1743,8 +1735,6 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_BGNFOR: - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -1753,8 +1743,6 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ ASSERT(mach->ContStackTop > 0); diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 236c50f4d9..9515cd8938 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -39,7 +39,7 @@ static void failover_destroy( struct pipe_context *pipe ) { struct failover_context *failover = failover_context( pipe ); - free( failover ); + FREE( failover ); } diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index b682ce6750..272e683067 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -29,6 +29,7 @@ */ #include "util/u_inlines.h" +#include "util/u_memory.h" #include "fo_context.h" @@ -53,7 +54,7 @@ static void * failover_create_blend_state( struct pipe_context *pipe, const struct pipe_blend_state *blend ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_blend_state(failover->sw, blend); @@ -85,7 +86,7 @@ failover_delete_blend_state( struct pipe_context *pipe, failover->hw->delete_blend_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void @@ -129,7 +130,7 @@ static void * failover_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); @@ -161,7 +162,7 @@ failover_delete_depth_stencil_state(struct pipe_context *pipe, failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void @@ -181,7 +182,7 @@ static void * failover_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_fs_state(failover->sw, templ); @@ -212,14 +213,14 @@ failover_delete_fs_state(struct pipe_context *pipe, failover->hw->delete_fs_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void * failover_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_vs_state(failover->sw, templ); @@ -252,7 +253,7 @@ failover_delete_vs_state(struct pipe_context *pipe, failover->hw->delete_vs_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } @@ -262,7 +263,7 @@ failover_create_vertex_elements_state( struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *velems ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems); @@ -295,7 +296,7 @@ failover_delete_vertex_elements_state( struct pipe_context *pipe, failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } static void @@ -315,7 +316,7 @@ static void * failover_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); @@ -348,7 +349,7 @@ failover_delete_rasterizer_state(struct pipe_context *pipe, failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } @@ -369,7 +370,7 @@ static void * failover_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *templ) { - struct fo_state *state = malloc(sizeof(struct fo_state)); + struct fo_state *state = MALLOC(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); @@ -443,7 +444,7 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) failover->hw->delete_sampler_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; - free(state); + FREE(state); } @@ -452,7 +453,7 @@ failover_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) { - struct fo_sampler_view *view = malloc(sizeof(struct fo_sampler_view)); + struct fo_sampler_view *view = MALLOC(sizeof(struct fo_sampler_view)); struct failover_context *failover = failover_context(pipe); view->sw = failover->sw->create_sampler_view(failover->sw, texture, templ); @@ -478,7 +479,7 @@ failover_sampler_view_destroy(struct pipe_context *pipe, failover->hw->sampler_view_destroy(failover->hw, fo_view->hw); pipe_resource_reference(&fo_view->base.texture, NULL); - free(fo_view); + FREE(fo_view); } static void diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 323af16b14..a701de33f5 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -168,7 +168,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) if (sz == 0) { if (brw->curbe.last_buf) { - free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 7d212e5c24..ce5ed0a9ed 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -84,8 +84,8 @@ void brw_clear_batch_cache( struct brw_context *brw ) while (item) { struct brw_cached_batch_item *next = item->next; - free((void *)item->header); - free(item); + FREE((void *)item->header); + FREE(item); item = next; } diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 6c47415cac..bd8b9174a8 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -40,6 +40,7 @@ #include <stdint.h> #include <string.h> +#include "util/u_string.h" #include "intel_decode.h" /*#include "intel_chipset.h"*/ @@ -478,7 +479,7 @@ i915_get_instruction_src0(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -496,7 +497,7 @@ i915_get_instruction_src1(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -513,7 +514,7 @@ i915_get_instruction_src2(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -642,7 +643,7 @@ i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_pre switch ((d0 >> 19) & 0x3) { case 1: - sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + util_snprintf(dcl_mask, sizeof(dcl_mask), ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); if (strcmp(dcl_mask, ".") == 0) fprintf(out, "bad (empty) dcl mask\n"); @@ -976,7 +977,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, if (i + 3 >= count) BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM"); - sprintf(instr_prefix, "PS%03d", instr); + util_snprintf(instr_prefix, sizeof(instr_prefix), "PS%03d", instr); i915_decode_instruction(data, hw_offset, i, instr_prefix); i += 3; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 630cdb5e49..0bc8bf2196 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -42,7 +42,7 @@ identity_destroy(struct pipe_context *_pipe) pipe->destroy(pipe); - free(id_pipe); + FREE(id_pipe); } static void @@ -708,7 +708,7 @@ identity_create_sampler_view(struct pipe_context *pipe, struct identity_resource *id_resource = identity_resource(texture); struct pipe_context *pipe_unwrapped = id_pipe->pipe; struct pipe_resource *texture_unwrapped = id_resource->resource; - struct identity_sampler_view *view = malloc(sizeof(struct identity_sampler_view)); + struct identity_sampler_view *view = MALLOC(sizeof(struct identity_sampler_view)); view->sampler_view = pipe_unwrapped->create_sampler_view(pipe_unwrapped, texture_unwrapped, @@ -736,7 +736,7 @@ identity_sampler_view_destroy(struct pipe_context *pipe, view_unwrapped); pipe_resource_reference(&view->texture, NULL); - free(view); + FREE(view); } static struct pipe_transfer * diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c index d332c36af2..a9d41af18c 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/identity/id_drm.c @@ -68,7 +68,7 @@ identity_drm_destroy(struct drm_api *_api) struct drm_api *api = id_api->api; api->destroy(api); - free(id_api); + FREE(id_api); } struct drm_api * diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4a3fc036c4..4ea367597e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -40,7 +40,7 @@ C_SOURCES = \ lp_state_vertex.c \ lp_state_vs.c \ lp_surface.c \ - lp_tex_sample_llvm.c \ + lp_tex_sample.c \ lp_texture.c \ lp_tile_image.c \ lp_tile_soa.c @@ -58,8 +58,9 @@ include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ - -LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a +LDFLAGS += $(LLVM_LDFLAGS) +LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) +LD=g++ $(PROGS): lp_test_main.o libllvmpipe.a diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index b9e9826e2a..2911cf2179 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -60,7 +60,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_tex_sample_llvm.c', + 'lp_tex_sample.c', 'lp_texture.c', 'lp_tile_image.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f7cf06d8d4..32b80d3a9f 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -97,63 +97,24 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; llvmpipe->pipe.priv = priv; - llvmpipe->pipe.destroy = llvmpipe_destroy; - - /* state setters */ - llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; - llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; - llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; - - llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; - llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; - llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; - - llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; - llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; - llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; - - llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; - llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; - llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; - - llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; - llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; - llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; - - llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; - llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; - llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; - llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; - llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; - llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; - - llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; - llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; - llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; - llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; + /* Init the pipe context methods */ + llvmpipe->pipe.destroy = llvmpipe_destroy; llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; - llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; - llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; - llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; - llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; - llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; - llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; - - llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; - - llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; - llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; - llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = llvmpipe_flush; - + llvmpipe_init_blend_funcs(llvmpipe); + llvmpipe_init_clip_funcs(llvmpipe); + llvmpipe_init_draw_funcs(llvmpipe); + llvmpipe_init_sampler_funcs(llvmpipe); llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_vertex_funcs(llvmpipe); + llvmpipe_init_fs_funcs(llvmpipe); + llvmpipe_init_vs_funcs(llvmpipe); + llvmpipe_init_rasterizer_funcs(llvmpipe); llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); + llvmpipe_init_surface_functions(llvmpipe); /* * Create drawing context and plug our rendering stage into it. @@ -186,8 +147,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); #endif - lp_init_surface_functions(llvmpipe); - lp_reset_counters(); return &llvmpipe->pipe; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 4848101ffb..4e597b2479 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -94,9 +94,6 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; - /** Which vertex shader output slot contains point size */ - int psize_slot; - /** The tiling engine */ struct lp_setup_context *setup; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 0b63e1c889..98780d7631 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -42,20 +42,12 @@ -void -llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - /** * Draw vertex arrays, with optional indexing. * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -void +static void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, @@ -115,7 +107,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, } -void +static void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, @@ -128,3 +120,19 @@ llvmpipe_draw_elements(struct pipe_context *pipe, mode, start, count ); } + +static void +llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); +} + + +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; + llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; + llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; +} diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 3627dbd759..644b821957 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -96,41 +96,40 @@ llvmpipe_flush( struct pipe_context *pipe, /** * Flush context if necessary. * - * TODO: move this logic to an auxiliary library? + * Returns FALSE if it would have block, but do_not_block was set, TRUE + * otherwise. * - * FIXME: We must implement DISCARD/DONTBLOCK/UNSYNCHRONIZED/etc for - * textures to avoid blocking. + * TODO: move this logic to an auxiliary library? */ boolean -llvmpipe_flush_texture(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, - unsigned level, - unsigned flush_flags, - boolean read_only, - boolean cpu_access, - boolean do_not_flush) +llvmpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block) { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, texture, face, level); + referenced = pipe->is_resource_referenced(pipe, resource, face, level); if ((referenced & PIPE_REFERENCED_FOR_WRITE) || ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { - if (do_not_flush) - return FALSE; - - /* - * TODO: The semantics of these flush flags are too obtuse. They should - * disappear and the pipe driver should just ensure that all visible - * side-effects happen when they need to happen. - */ - if (referenced & PIPE_REFERENCED_FOR_WRITE) - flush_flags |= PIPE_FLUSH_RENDER_CACHE; + if (resource->target != PIPE_BUFFER) { + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; - if (referenced & PIPE_REFERENCED_FOR_READ) - flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + } if (cpu_access) { /* @@ -139,6 +138,9 @@ llvmpipe_flush_texture(struct pipe_context *pipe, struct pipe_fence_handle *fence = NULL; + if (do_not_block) + return FALSE; + pipe->flush(pipe, flush_flags, &fence); if (fence) { diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 2375d22b85..7b605681a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -33,17 +33,18 @@ struct pipe_context; struct pipe_fence_handle; -void llvmpipe_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); +void +llvmpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); boolean -llvmpipe_flush_texture(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, - unsigned level, - unsigned flush_flags, - boolean read_only, - boolean cpu_access, - boolean do_not_flush); +llvmpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 8690941a50..466a2f54fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -171,15 +171,6 @@ lp_jit_screen_cleanup(struct llvmpipe_screen *screen) void lp_jit_screen_init(struct llvmpipe_screen *screen) { - util_cpu_detect(); - -#if 0 - /* For simulating less capable machines */ - util_cpu_caps.has_sse3 = 0; - util_cpu_caps.has_ssse3 = 0; - util_cpu_caps.has_sse4_1 = 0; -#endif - lp_build_init(); screen->module = lp_build_module; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_size.h b/src/gallium/drivers/llvmpipe/lp_limits.h index f0b983c063..4102a9df67 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_size.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -25,8 +25,12 @@ * **************************************************************************/ -#ifndef LP_TILE_SIZE_H -#define LP_TILE_SIZE_H +/** + * Implementation limits for LLVMpipe driver. + */ + +#ifndef LP_LIMITS_H +#define LP_LIMITS_H /** @@ -36,4 +40,31 @@ #define TILE_SIZE (1 << TILE_ORDER) -#endif +/** + * Max texture sizes + */ +#define LP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K for now */ +#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ + + +/** This must be the larger of LP_MAX_TEXTURE_2D/3D_LEVELS */ +#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS + + +/** + * Max drawing surface size is the max texture size + */ +#define LP_MAX_HEIGHT (1 << (LP_MAX_TEXTURE_LEVELS - 1)) +#define LP_MAX_WIDTH (1 << (LP_MAX_TEXTURE_LEVELS - 1)) + + +#define LP_MAX_THREADS 8 + + +/** + * Max bytes per scene. This may be replaced by a runtime parameter. + */ +#define LP_MAX_SCENE_SIZE (512 * 1024 * 1024) + + +#endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 4046701b85..a00a592f2f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,7 +28,6 @@ #include <limits.h> #include "util/u_memory.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" #include "util/u_surface.h" #include "lp_scene_queue.h" @@ -122,9 +121,11 @@ lp_rast_end( struct lp_rasterizer *rast ) rast->curr_scene = NULL; +#ifdef DEBUG if (0) - printf("Post render scene: tile read: %d tile write: %d\n", - tile_read_count, tile_write_count); + debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n", + lp_tile_unswizzle_count, lp_tile_swizzle_count); +#endif } @@ -869,20 +870,6 @@ create_rast_threads(struct lp_rasterizer *rast) { unsigned i; -#ifdef PIPE_OS_WINDOWS - /* Multithreading not supported on windows until conditions and barriers are - * properly implemented. */ - rast->num_threads = 0; -#else -#ifdef PIPE_OS_EMBEDDED - rast->num_threads = 0; -#else - rast->num_threads = util_cpu_caps.nr_cpus; -#endif - rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); - rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); -#endif - /* NOTE: if num_threads is zero, we won't use any threads */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_init(&rast->tasks[i].work_ready, 0); @@ -895,12 +882,12 @@ create_rast_threads(struct lp_rasterizer *rast) /** - * Create new lp_rasterizer. - * \param empty the queue to put empty scenes on after we've finished - * processing them. + * Create new lp_rasterizer. If num_threads is zero, don't create any + * new threads, do rendering synchronously. + * \param num_threads number of rasterizer threads to create */ struct lp_rasterizer * -lp_rast_create( void ) +lp_rast_create( unsigned num_threads ) { struct lp_rasterizer *rast; unsigned i; @@ -917,6 +904,8 @@ lp_rast_create( void ) task->thread_index = i; } + rast->num_threads = num_threads; + create_rast_threads(rast); /* for synchronizing rasterization threads */ @@ -955,6 +944,8 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); + lp_scene_queue_destroy(rast->full_scenes); + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index a0ecb2fc47..e2f6f92677 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -134,7 +134,7 @@ struct lp_rast_triangle { struct lp_rasterizer * -lp_rast_create( void ); +lp_rast_create( unsigned num_threads ); void lp_rast_destroy( struct lp_rasterizer * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 8bf2b92a6a..5884d12721 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -35,9 +35,7 @@ #include "lp_scene.h" #include "lp_texture.h" #include "lp_tile_soa.h" - - -#define MAX_THREADS 8 /* XXX probably temporary here */ +#include "lp_limits.h" struct lp_rasterizer; @@ -107,16 +105,16 @@ struct lp_rasterizer * (potentially) shared, these empty scenes should be returned to * the context which created them rather than retained here. */ - struct lp_scene_queue *empty_scenes; + /* struct lp_scene_queue *empty_scenes; */ /** The scene currently being rasterized by the threads */ struct lp_scene *curr_scene; /** A task object for each rasterization thread */ - struct lp_rasterizer_task tasks[MAX_THREADS]; + struct lp_rasterizer_task tasks[LP_MAX_THREADS]; unsigned num_threads; - pipe_thread threads[MAX_THREADS]; + pipe_thread threads[LP_MAX_THREADS]; /** For synchronizing the rasterization threads */ pipe_barrier barrier; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 182e7cb230..1482a777ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -32,9 +32,20 @@ #include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" -#include "lp_debug.h" +/** List of texture references */ +struct texture_ref { + struct pipe_resource *texture; + struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +}; + + + +/** + * Create a new scene object. + * \param queue the queue to put newly rendered/emptied scenes into + */ struct lp_scene * lp_scene_create( struct pipe_context *pipe, struct lp_scene_queue *queue ) @@ -57,7 +68,7 @@ lp_scene_create( struct pipe_context *pipe, scene->data.head = scene->data.tail = CALLOC_STRUCT(data_block); - make_empty_list(&scene->textures); + make_empty_list(&scene->resources); pipe_mutex_init(scene->mutex); @@ -66,7 +77,7 @@ lp_scene_create( struct pipe_context *pipe, /** - * Free all data associated with the given scene, and free(scene). + * Free all data associated with the given scene, and the scene itself. */ void lp_scene_destroy(struct lp_scene *scene) @@ -178,15 +189,17 @@ lp_scene_reset(struct lp_scene *scene ) /* Release texture refs */ { - struct texture_ref *ref, *next, *ref_list = &scene->textures; + struct resource_ref *ref, *next, *ref_list = &scene->resources; for (ref = ref_list->next; ref != ref_list; ref = next) { next = next_elem(ref); - pipe_resource_reference(&ref->texture, NULL); + pipe_resource_reference(&ref->resource, NULL); FREE(ref); } make_empty_list(ref_list); } + scene->scene_size = 0; + scene->has_color_clear = FALSE; scene->has_depth_clear = FALSE; } @@ -218,7 +231,10 @@ lp_bin_new_data_block( struct data_block_list *list ) } -/** Return number of bytes used for all bin data within a scene */ +/** + * Return number of bytes used for all bin data within a scene. + * This does not include resources (textures) referenced by the scene. + */ unsigned lp_scene_data_size( const struct lp_scene *scene ) { @@ -247,32 +263,34 @@ lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) /** - * Add a reference to a texture by the scene. + * Add a reference to a resource by the scene. */ void -lp_scene_texture_reference( struct lp_scene *scene, - struct pipe_resource *texture ) +lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource) { - struct texture_ref *ref = CALLOC_STRUCT(texture_ref); + struct resource_ref *ref = CALLOC_STRUCT(resource_ref); if (ref) { - struct texture_ref *ref_list = &scene->textures; - pipe_resource_reference(&ref->texture, texture); + struct resource_ref *ref_list = &scene->resources; + pipe_resource_reference(&ref->resource, resource); insert_at_tail(ref_list, ref); } + + scene->scene_size += llvmpipe_resource_size(resource); } /** - * Does this scene have a reference to the given texture? + * Does this scene have a reference to the given resource? */ boolean -lp_scene_is_resource_referenced( const struct lp_scene *scene, - const struct pipe_resource *texture ) +lp_scene_is_resource_referenced(const struct lp_scene *scene, + const struct pipe_resource *resource) { - const struct texture_ref *ref_list = &scene->textures; - const struct texture_ref *ref; + const struct resource_ref *ref_list = &scene->resources; + const struct resource_ref *ref; foreach (ref, ref_list) { - if (ref->texture == texture) + if (ref->resource == resource) return TRUE; } return FALSE; @@ -393,61 +411,6 @@ end: } - -/** - * Prepare this scene for the rasterizer. - * Map the framebuffer surfaces. Initialize the 'rast' state. - */ -static boolean -lp_scene_map_buffers( struct lp_scene *scene ) -{ - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - /* XXX framebuffer surfaces are no longer mapped here */ - /* XXX move all map/unmap stuff into rast module... */ - - return TRUE; -} - - - -/** - * Called after rasterizer as finished rasterizing a scene. - * - * We want to call this from the pipe_context's current thread to - * avoid having to have mutexes on the transfer functions. - */ -static void -lp_scene_unmap_buffers( struct lp_scene *scene ) -{ -#if 0 - unsigned i; - - for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (scene->cbuf_map[i]) { - struct pipe_surface *cbuf = scene->fb.cbufs[i]; - llvmpipe_resource_unmap(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice); - scene->cbuf_map[i] = NULL; - } - } - - if (scene->zsbuf_map) { - struct pipe_surface *zsbuf = scene->fb.zsbuf; - llvmpipe_resource_unmap(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice); - scene->zsbuf_map = NULL; - } -#endif - - util_unreference_framebuffer_state( &scene->fb ); -} - - void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ) { @@ -464,8 +427,7 @@ void lp_scene_begin_binning( struct lp_scene *scene, void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast, - boolean write_depth ) + struct lp_rasterizer *rast ) { if (0) { unsigned x, y; @@ -479,11 +441,6 @@ void lp_scene_rasterize( struct lp_scene *scene, } } - scene->write_depth = (scene->fb.zsbuf != NULL && - write_depth); - - lp_scene_map_buffers( scene ); - /* Enqueue the scene for rasterization, then immediately wait for * it to finish. */ @@ -494,6 +451,9 @@ void lp_scene_rasterize( struct lp_scene *scene, * transfers become per-context: */ lp_rast_finish( rast ); - lp_scene_unmap_buffers( scene ); + + util_unreference_framebuffer_state( &scene->fb ); + + /* put scene into the empty list */ lp_scene_enqueue( scene->empty_queue, scene ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index ac0717db6a..9467cd6f16 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -44,10 +44,8 @@ struct lp_scene_queue; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILE_SIZE) -#define TILES_Y (MAXHEIGHT / TILE_SIZE) +#define TILES_X (LP_MAX_WIDTH / TILE_SIZE) +#define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE) #define CMD_BLOCK_MAX 128 @@ -97,10 +95,10 @@ struct data_block_list { }; -/** List of texture references */ -struct texture_ref { - struct pipe_resource *texture; - struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +/** List of resource references */ +struct resource_ref { + struct pipe_resource *resource; + struct resource_ref *prev, *next; /**< linked list w/ u_simple_list.h */ }; @@ -118,10 +116,14 @@ struct lp_scene { /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; - /** list of textures referenced by the scene commands */ - struct texture_ref textures; + /** list of resources referenced by the scene commands */ + struct resource_ref resources; + + /** Approx memory used by the scene (in bytes). This includes the + * shared and per-tile bins plus any referenced resources/textures. + */ + unsigned scene_size; - boolean write_depth; boolean has_color_clear; boolean has_depth_clear; @@ -164,11 +166,11 @@ unsigned lp_scene_data_size( const struct lp_scene *scene ); unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); -void lp_scene_texture_reference( struct lp_scene *scene, - struct pipe_resource *texture ); +void lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource); -boolean lp_scene_is_resource_referenced( const struct lp_scene *scene, - const struct pipe_resource *texture ); +boolean lp_scene_is_resource_referenced(const struct lp_scene *scene, + const struct pipe_resource *resource ); /** @@ -184,6 +186,8 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size) lp_bin_new_data_block( list ); } + scene->scene_size += size; + { struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; @@ -206,6 +210,8 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, lp_bin_new_data_block( list ); } + scene->scene_size += size; + { struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; @@ -222,6 +228,7 @@ static INLINE void lp_scene_putback_data( struct lp_scene *scene, unsigned size) { struct data_block_list *list = &scene->data; + scene->scene_size -= size; assert(list->tail->used >= size); list->tail->used -= size; } @@ -304,11 +311,18 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast, - boolean write_depth ); + struct lp_rasterizer *rast ); void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ); + +static INLINE unsigned +lp_scene_get_size(const struct lp_scene *scene) +{ + return scene->scene_size; +} + + #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 6d309c6b64..111eedc4f2 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,8 @@ #include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" @@ -39,6 +41,7 @@ #include "lp_context.h" #include "lp_debug.h" #include "lp_public.h" +#include "lp_limits.h" #include "state_tracker/sw_winsys.h" @@ -94,6 +97,8 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -165,7 +170,7 @@ static boolean llvmpipe_is_format_supported( struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, - unsigned tex_usage, + unsigned bind, unsigned geom_flags ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); @@ -173,7 +178,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, const struct util_format_description *format_desc; format_desc = util_format_description(format); - if(!format_desc) + if (!format_desc) return FALSE; assert(target == PIPE_TEXTURE_1D || @@ -181,45 +186,42 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - switch(format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return util_format_s3tc_enabled; - default: - break; - } - - if(tex_usage & PIPE_BIND_RENDER_TARGET) { - if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) return FALSE; - if(format_desc->block.width != 1 || - format_desc->block.height != 1) + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; - if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + if (format_desc->block.width != 1 || + format_desc->block.height != 1) return FALSE; } - if(tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) + if (bind & PIPE_BIND_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, bind, format)) return FALSE; } - if(tex_usage & PIPE_BIND_DEPTH_STENCIL) { - if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return FALSE; + + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; /* FIXME: Temporary restriction. See lp_state_fs.c. */ - if(format_desc->block.bits != 32) + if (format_desc->block.bits != 32) return FALSE; } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + return util_format_s3tc_enabled; + } + + /* + * Everything else should be supported by u_format. + */ return TRUE; } @@ -286,12 +288,26 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; - util_format_s3tc_init(); - llvmpipe_init_screen_resource_funcs(&screen->base); llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); +#ifdef PIPE_OS_WINDOWS + /* Multithreading not supported on windows until conditions and barriers are + * properly implemented. */ + screen->num_threads = 0; +#else +#ifdef PIPE_OS_EMBEDDED + screen->num_threads = 0; +#else + screen->num_threads = util_cpu_caps.nr_cpus; +#endif + screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); + screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); +#endif + + util_format_s3tc_init(); + return &screen->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index af25e043cc..4f39432610 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -58,6 +58,8 @@ struct llvmpipe_screen LLVMTypeRef context_ptr_type; + unsigned num_threads; + /* Increments whenever textures are modified. Contexts can track * this. */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6be13c60a5..2150956008 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -74,6 +74,26 @@ lp_setup_get_current_scene(struct lp_setup_context *setup) } +/** + * Check if the size of the current scene has exceeded the limit. + * If so, flush/render it. + */ +static void +setup_check_scene_size_and_flush(struct lp_setup_context *setup) +{ + if (setup->scene) { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + unsigned size = lp_scene_get_size(scene); + + if (size > LP_MAX_SCENE_SIZE) { + /*printf("LLVMPIPE: scene size = %u, flushing.\n", size);*/ + set_scene_state( setup, SETUP_FLUSHED ); + /*assert(lp_scene_get_size(scene) == 0);*/ + } + } +} + + static void first_triangle( struct lp_setup_context *setup, const float (*v0)[4], @@ -132,14 +152,11 @@ static void reset_context( struct lp_setup_context *setup ) /** Rasterize all scene's bins */ static void -lp_setup_rasterize_scene( struct lp_setup_context *setup, - boolean write_depth ) +lp_setup_rasterize_scene( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_scene_rasterize(scene, - setup->rast, - write_depth); + lp_scene_rasterize(scene, setup->rast); reset_context( setup ); @@ -190,7 +207,7 @@ execute_clears( struct lp_setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - lp_setup_rasterize_scene( setup, TRUE ); + lp_setup_rasterize_scene( setup ); } @@ -221,7 +238,7 @@ set_scene_state( struct lp_setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - lp_setup_rasterize_scene( setup, TRUE ); + lp_setup_rasterize_scene( setup ); break; default: @@ -243,7 +260,7 @@ lp_setup_flush( struct lp_setup_context *setup, if (setup->scene) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - union lp_rast_cmd_arg dummy; + union lp_rast_cmd_arg dummy = {0}; if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { @@ -343,20 +360,25 @@ lp_setup_clear( struct lp_setup_context *setup, struct pipe_fence_handle * lp_setup_fence( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ - struct lp_fence *fence = lp_fence_create(rank); + if (setup->num_threads == 0) { + return NULL; + } + else { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ + struct lp_fence *fence = lp_fence_create(rank); - LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - set_scene_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); - /* insert the fence into all command bins */ - lp_scene_bin_everywhere( scene, - lp_rast_fence, - lp_rast_arg_fence(fence) ); + /* insert the fence into all command bins */ + lp_scene_bin_everywhere( scene, + lp_rast_fence, + lp_rast_arg_fence(fence) ); - return (struct pipe_fence_handle *) fence; + return (struct pipe_fence_handle *) fence; + } } @@ -591,10 +613,14 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, void lp_setup_update_state( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + setup_check_scene_size_and_flush(setup); + + scene = lp_setup_get_current_scene(setup); + assert(setup->fs.current.jit_function); /* Some of the 'draw' pipeline stages may have changed some driver state. @@ -715,7 +741,7 @@ lp_setup_update_state( struct lp_setup_context *setup ) */ for (i = 0; i < Elements(setup->fs.current_tex); i++) { if (setup->fs.current_tex[i]) - lp_scene_texture_reference(scene, setup->fs.current_tex[i]); + lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]); } } } @@ -736,6 +762,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) reset_context( setup ); + util_unreference_framebuffer_state(&setup->fb); + for (i = 0; i < Elements(setup->fs.current_tex); i++) { pipe_resource_reference(&setup->fs.current_tex[i], NULL); } @@ -750,6 +778,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_destroy(scene); } + lp_scene_queue_destroy(setup->empty_scenes); + lp_rast_destroy( setup->rast ); FREE( setup ); @@ -765,8 +795,9 @@ struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { - unsigned i; + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context); + unsigned i; if (!setup) return NULL; @@ -779,7 +810,8 @@ lp_setup_create( struct pipe_context *pipe, /* XXX: move this to the screen and share between contexts: */ - setup->rast = lp_rast_create(); + setup->num_threads = screen->num_threads; + setup->rast = lp_rast_create(screen->num_threads); if (!setup->rast) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 4594f7597d..584764ce8a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -80,6 +80,7 @@ struct lp_setup_context * create/install this itself now. */ struct draw_stage *vbuf; + unsigned num_threads; struct lp_rasterizer *rast; struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index a401275478..5d3122e8ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -440,7 +440,12 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_setup_vbuf_destroy(struct vbuf_render *vbr) { - lp_setup_destroy(lp_setup_context(vbr)); + struct lp_setup_context *setup = lp_setup_context(vbr); + if (setup->vertex_buffer) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = NULL; + } + lp_setup_destroy(setup); } diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index dcbff190b6..18143807c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,11 +31,10 @@ #ifndef LP_STATE_H #define LP_STATE_H -#include "gallivm/lp_bld.h" - #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "gallivm/lp_bld.h" #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ @@ -85,8 +84,6 @@ struct lp_fragment_shader_variant_key struct lp_fragment_shader_variant { - struct lp_fragment_shader *shader; - struct lp_fragment_shader_variant_key key; LLVMValueRef function[2]; @@ -97,11 +94,7 @@ struct lp_fragment_shader_variant }; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ +/** Subclass of pipe_shader_state */ struct lp_fragment_shader { struct pipe_shader_state base; @@ -109,140 +102,58 @@ struct lp_fragment_shader struct tgsi_shader_info info; struct lp_fragment_shader_variant *variants; - - struct lp_fragment_shader_variant *current; }; /** Subclass of pipe_shader_state */ -struct lp_vertex_shader { +struct lp_vertex_shader +{ struct pipe_shader_state shader; struct draw_vertex_shader *draw_data; }; -struct lp_velems_state { + +/** Vertex element state */ +struct lp_velems_state +{ unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; }; -void * -llvmpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void llvmpipe_bind_blend_state(struct pipe_context *, - void *); -void llvmpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -llvmpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); void -llvmpipe_bind_vertex_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void llvmpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -llvmpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void llvmpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void llvmpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -llvmpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void llvmpipe_bind_rasterizer_state(struct pipe_context *, void *); -void llvmpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void llvmpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void llvmpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void llvmpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ); - -void llvmpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void llvmpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - struct pipe_resource *buf); - -void *llvmpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void llvmpipe_bind_fs_state(struct pipe_context *, void *); -void llvmpipe_delete_fs_state(struct pipe_context *, void *); -void *llvmpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void llvmpipe_bind_vs_state(struct pipe_context *, void *); -void llvmpipe_delete_vs_state(struct pipe_context *, void *); - -void *llvmpipe_create_vertex_elements_state(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); -void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *); -void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *); - -void llvmpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void llvmpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void llvmpipe_set_fragment_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); +llvmpipe_set_framebuffer_state(struct pipe_context *, + const struct pipe_framebuffer_state *); void -llvmpipe_set_vertex_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); - -struct pipe_sampler_view * -llvmpipe_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ); +llvmpipe_update_fs(struct llvmpipe_context *lp); void -llvmpipe_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view); +llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); -void llvmpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void llvmpipe_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_update_fs(struct llvmpipe_context *lp); +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe); +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count); void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe); void -llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe); void -llvmpipe_unmap_texture_surfaces(struct llvmpipe_context *lp); +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 4ee28473e8..8569507f4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -40,15 +40,16 @@ #include "lp_state.h" -void * +static void * llvmpipe_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { return mem_dup(blend, sizeof(*blend)); } -void llvmpipe_bind_blend_state( struct pipe_context *pipe, - void *blend ) + +static void +llvmpipe_bind_blend_state(struct pipe_context *pipe, void *blend) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -62,15 +63,17 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, llvmpipe->dirty |= LP_NEW_BLEND; } -void llvmpipe_delete_blend_state(struct pipe_context *pipe, - void *blend) + +static void +llvmpipe_delete_blend_state(struct pipe_context *pipe, void *blend) { FREE( blend ); } -void llvmpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) +static void +llvmpipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -93,14 +96,15 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, */ -void * +static void * llvmpipe_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { return mem_dup(depth_stencil, sizeof(*depth_stencil)); } -void + +static void llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { @@ -116,14 +120,17 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void llvmpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) { FREE( depth ); } -void llvmpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ) + +static void +llvmpipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *stencil_ref) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -142,3 +149,18 @@ void llvmpipe_set_stencil_ref( struct pipe_context *pipe, } +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; + llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; + llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; + + llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; + llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; + llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; + + llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; + + llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_clip.c b/src/gallium/drivers/llvmpipe/lp_state_clip.c index df68f27acc..32ae079cc1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_clip.c +++ b/src/gallium/drivers/llvmpipe/lp_state_clip.c @@ -32,8 +32,9 @@ #include "draw/draw_context.h" -void llvmpipe_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +llvmpipe_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -42,8 +43,9 @@ void llvmpipe_set_clip_state( struct pipe_context *pipe, } -void llvmpipe_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) +static void +llvmpipe_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -55,8 +57,9 @@ void llvmpipe_set_viewport_state( struct pipe_context *pipe, } -void llvmpipe_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) +static void +llvmpipe_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -67,8 +70,9 @@ void llvmpipe_set_scissor_state( struct pipe_context *pipe, } -void llvmpipe_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) +static void +llvmpipe_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -77,3 +81,14 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *pipe, llvmpipe->poly_stipple = *stipple; /* struct copy */ llvmpipe->dirty |= LP_NEW_STIPPLE; } + + + +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; + llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; + llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; + llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 59d5a440c0..513e62e39e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -403,9 +403,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef step2_ptr) { const struct tgsi_token *tokens = shader->base.tokens; - LLVMTypeRef elem_type; LLVMTypeRef vec_type; - LLVMTypeRef int_vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; @@ -422,9 +420,7 @@ generate_fs(struct llvmpipe_context *lp, stencil_refs[0] = lp_jit_context_stencil_ref_front_value(builder, context_ptr); stencil_refs[1] = lp_jit_context_stencil_ref_back_value(builder, context_ptr); - elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); - int_vec_type = lp_build_int_vec_type(type); consts_ptr = lp_jit_context_constants(builder, context_ptr); @@ -474,7 +470,7 @@ generate_fs(struct llvmpipe_context *lp, lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler); + outputs, sampler, &shader->info); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -546,7 +542,6 @@ generate_blend(const struct pipe_blend_state *blend, struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; - LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; @@ -561,7 +556,6 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); - int_vec_type = lp_build_int_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, @@ -624,10 +618,8 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; - LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[15]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); @@ -682,11 +674,9 @@ generate_fragment(struct llvmpipe_context *lp, */ fs_elem_type = lp_build_elem_type(fs_type); - fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); - blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ @@ -945,7 +935,6 @@ generate_variant(struct llvmpipe_context *lp, if(!variant) return NULL; - variant->shader = shader; memcpy(&variant->key, key, sizeof *key); generate_fragment(lp, shader, variant, 0); @@ -959,7 +948,7 @@ generate_variant(struct llvmpipe_context *lp, } -void * +static void * llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -975,11 +964,16 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); + tgsi_dump(templ->tokens, 0); + } + return shader; } -void +static void llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -995,7 +989,7 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) } -void +static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -1038,7 +1032,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) -void +static void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_resource *constants) @@ -1112,8 +1106,8 @@ make_variant_key(struct llvmpipe_context *lp, unsigned chan; format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); - assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || - format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); key->blend.rt[i].colormask = lp->blend->rt[i].colormask; @@ -1169,8 +1163,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ } - shader->current = variant; - /* TODO: put this in the variant */ /* TODO: most of these can be relaxed, in particular the colormask */ opaque = !key.blend.logicop_enable && @@ -1184,7 +1176,19 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) ? TRUE : FALSE; lp_setup_set_fs_functions(lp->setup, - shader->current->jit_function[RAST_WHOLE], - shader->current->jit_function[RAST_EDGE_TEST], + variant->jit_function[RAST_WHOLE], + variant->jit_function[RAST_EDGE_TEST], opaque); } + + + +void +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; + llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; + llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; + + llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 47f65fe72d..622eb47ff4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -34,7 +34,7 @@ -void * +static void * llvmpipe_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *rast) { @@ -46,7 +46,7 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, -void +static void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -71,16 +71,27 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, llvmpipe->rasterizer->scissor, llvmpipe->rasterizer->gl_rasterization_rules); + lp_setup_set_flatshade_first( llvmpipe->setup, + llvmpipe->rasterizer->flatshade_first); } llvmpipe->dirty |= LP_NEW_RASTERIZER; } -void llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) +static void +llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { FREE( rasterizer ); } + +void +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; + llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; + llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 3552ff50ce..55d43368a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -41,7 +41,7 @@ -void * +static void * llvmpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { @@ -49,7 +49,7 @@ llvmpipe_create_sampler_state(struct pipe_context *pipe, } -void +static void llvmpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -76,7 +76,7 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, } -void +static void llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -104,7 +104,7 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, } -void +static void llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -133,7 +133,7 @@ llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, } -void +static void llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -163,7 +163,7 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, } -struct pipe_sampler_view * +static struct pipe_sampler_view * llvmpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) @@ -182,7 +182,7 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe, } -void +static void llvmpipe_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { @@ -191,7 +191,7 @@ llvmpipe_sampler_view_destroy(struct pipe_context *pipe, } -void +static void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { @@ -199,4 +199,16 @@ llvmpipe_delete_sampler_state(struct pipe_context *pipe, } - +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; + + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; + llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; + llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; + llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; + llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; + llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 7d86c5750c..63b8f27b39 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -52,8 +52,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); - assert(fb->width <= MAXWIDTH); - assert(fb->height <= MAXHEIGHT); + assert(fb->width <= LP_MAX_WIDTH); + assert(fb->height <= LP_MAX_HEIGHT); if (changed) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index f6427aa908..113f13db01 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" -void * +static void * llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -50,7 +50,7 @@ llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void +static void llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -65,13 +65,13 @@ llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem); } -void +static void llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); } -void +static void llvmpipe_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -87,3 +87,15 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(llvmpipe->draw, count, buffers); } + + + +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; + llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; + llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; + + llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 884e3878e6..f2d8808990 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -28,15 +28,17 @@ #include "pipe/p_defines.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "util/u_memory.h" #include "draw/draw_context.h" #include "lp_context.h" +#include "lp_debug.h" #include "lp_state.h" -void * +static void * llvmpipe_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -57,6 +59,11 @@ llvmpipe_create_vs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create vertex shader %p:\n", (void *) state); + tgsi_dump(templ->tokens, 0); + } + return state; fail: @@ -69,7 +76,7 @@ fail: } -void +static void llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -87,7 +94,7 @@ llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) } -void +static void llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -99,3 +106,13 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) FREE( (void *)state->shader.tokens ); FREE( state ); } + + + +void +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; + llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; + llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 1a116989d4..8bd83f576f 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -28,9 +28,9 @@ #include "util/u_rect.h" #include "lp_context.h" #include "lp_flush.h" +#include "lp_limits.h" #include "lp_surface.h" #include "lp_texture.h" -#include "lp_tile_size.h" /** @@ -59,19 +59,19 @@ lp_surface_copy(struct pipe_context *pipe, struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst->texture); const enum pipe_format format = src_tex->base.format; - llvmpipe_flush_texture(pipe, - dst->texture, dst->face, dst->level, - 0, /* flush_flags */ - FALSE, /* read_only */ - FALSE, /* cpu_access */ - FALSE); /* do_not_flush */ + llvmpipe_flush_resource(pipe, + dst->texture, dst->face, dst->level, + 0, /* flush_flags */ + FALSE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_block */ - llvmpipe_flush_texture(pipe, - src->texture, src->face, src->level, - 0, /* flush_flags */ - TRUE, /* read_only */ - FALSE, /* cpu_access */ - FALSE); /* do_not_flush */ + llvmpipe_flush_resource(pipe, + src->texture, src->face, src->level, + 0, /* flush_flags */ + TRUE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_block */ /* printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", @@ -146,7 +146,7 @@ lp_surface_copy(struct pipe_context *pipe, void -lp_init_surface_functions(struct llvmpipe_context *lp) +llvmpipe_init_surface_functions(struct llvmpipe_context *lp) { lp->pipe.surface_copy = lp_surface_copy; lp->pipe.surface_fill = util_surface_fill; diff --git a/src/gallium/drivers/llvmpipe/lp_surface.h b/src/gallium/drivers/llvmpipe/lp_surface.h index 4d78a53c4f..b1b896ebd9 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.h +++ b/src/gallium/drivers/llvmpipe/lp_surface.h @@ -36,7 +36,7 @@ struct llvmpipe_context; extern void -lp_init_surface_functions(struct llvmpipe_context *lp); +llvmpipe_init_surface_functions(struct llvmpipe_context *lp); #endif /* LP_SURFACE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 818f7a9a56..fae7bf3fcf 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -154,7 +154,6 @@ add_blend_test(LLVMModuleRef module, enum vector_mode mode, struct lp_type type) { - LLVMTypeRef ret_type; LLVMTypeRef vec_type; LLVMTypeRef args[4]; LLVMValueRef func; @@ -165,7 +164,6 @@ add_blend_test(LLVMModuleRef module, LLVMBasicBlockRef block; LLVMBuilderRef builder; - ret_type = LLVMInt64Type(); vec_type = lp_build_vec_type(type); args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 74b7393e4e..74b7393e4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index cee170ec83..2f41d620c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -47,7 +47,6 @@ #include "lp_tile_image.h" #include "lp_texture.h" #include "lp_setup.h" -#include "lp_tile_size.h" #include "state_tracker/sw_winsys.h" @@ -55,14 +54,18 @@ static INLINE boolean resource_is_texture(const struct pipe_resource *resource) { - const unsigned tex_binds = (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED | - PIPE_BIND_DEPTH_STENCIL | - PIPE_BIND_SAMPLER_VIEW); - const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource); - - return (lpr->base.bind & tex_binds) ? TRUE : FALSE; + switch (resource->target) { + case PIPE_BUFFER: + return FALSE; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + return TRUE; + default: + assert(0); + return FALSE; + } } @@ -81,7 +84,7 @@ alloc_layout_array(unsigned num_slices, unsigned width, unsigned height) assert(LP_TEX_LAYOUT_NONE == 0); /* calloc'ing LP_TEX_LAYOUT_NONE here */ return (enum lp_texture_layout *) - calloc(num_slices * tx * ty, sizeof(enum lp_texture_layout)); + CALLOC(num_slices * tx * ty, sizeof(enum lp_texture_layout)); } @@ -189,20 +192,20 @@ llvmpipe_resource_create(struct pipe_screen *_screen, assert(lpr->base.bind); - if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - /* displayable surface */ - if (!llvmpipe_displaytarget_layout(screen, lpr)) - goto fail; - assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); - } - else if (lpr->base.bind & (PIPE_BIND_SAMPLER_VIEW | - PIPE_BIND_DEPTH_STENCIL)) { - /* texture map */ - if (!llvmpipe_texture_layout(screen, lpr)) - goto fail; - assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + if (resource_is_texture(&lpr->base)) { + if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) { + /* displayable surface */ + if (!llvmpipe_displaytarget_layout(screen, lpr)) + goto fail; + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + } + else { + /* texture map */ + if (!llvmpipe_texture_layout(screen, lpr)) + goto fail; + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + } + assert(lpr->layout[0]); } else { /* other data (vertex buffer, const buffer, etc) */ @@ -217,10 +220,6 @@ llvmpipe_resource_create(struct pipe_screen *_screen, goto fail; } - if (resource_is_texture(&lpr->base)) { - assert(lpr->layout[0]); - } - lpr->id = id_counter++; return &lpr->base; @@ -242,6 +241,13 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, /* display target */ struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpr->dt); + + if (lpr->tiled[0].data) { + align_free(lpr->tiled[0].data); + lpr->tiled[0].data = NULL; + } + + FREE(lpr->layout[0]); } else if (resource_is_texture(pt)) { /* regular texture */ @@ -265,7 +271,7 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, /* free layout flag arrays */ for (level = 0; level < Elements(lpr->tiled); level++) { - free(lpr->layout[level]); + FREE(lpr->layout[level]); lpr->layout[level] = NULL; } } @@ -389,10 +395,7 @@ llvmpipe_resource_data(struct pipe_resource *resource) { struct llvmpipe_resource *lpr = llvmpipe_resource(resource); - assert((lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED | - PIPE_BIND_SAMPLER_VIEW)) == 0); + assert(!resource_is_texture(resource)); return lpr->data; } @@ -496,6 +499,27 @@ llvmpipe_get_transfer(struct pipe_context *pipe, assert(resource); assert(sr.level <= resource->last_level); + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + boolean read_only = !(usage & PIPE_TRANSFER_WRITE); + boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); + if (!llvmpipe_flush_resource(pipe, resource, + sr.face, sr.level, + 0, /* flush_flags */ + read_only, + TRUE, /* cpu_access */ + do_not_block)) { + /* + * It would have blocked, but state tracker requested no to. + */ + assert(do_not_block); + return NULL; + } + } + lpr = CALLOC_STRUCT(llvmpipe_transfer); if (lpr) { struct pipe_transfer *pt = &lpr->base; @@ -566,19 +590,6 @@ llvmpipe_transfer_map( struct pipe_context *pipe, lpr = llvmpipe_resource(transfer->resource); format = lpr->base.format; - /* - * Transfers, like other pipe operations, must happen in order, so flush the - * context if necessary. - */ - llvmpipe_flush_texture(pipe, - transfer->resource, - transfer->sr.face, - transfer->sr.level, - 0, /* flush_flags */ - !(transfer->usage & PIPE_TRANSFER_WRITE), /* read_only */ - TRUE, /* cpu_access */ - FALSE); /* do_not_flush */ - map = llvmpipe_resource_map(transfer->resource, transfer->sr.face, transfer->sr.level, @@ -994,14 +1005,16 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, x * TILE_SIZE, y * TILE_SIZE, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } else { lp_tiled_to_linear(other_data, target_data, x * TILE_SIZE, y * TILE_SIZE, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } } @@ -1090,7 +1103,8 @@ llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr, if (convert) { lp_tiled_to_linear(tiled_image, linear_image, x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } if (new_layout != cur_layout) @@ -1138,7 +1152,8 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, if (convert) { lp_linear_to_tiled(linear_image, tiled_image, x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, - lpr->row_stride[level]); + lpr->row_stride[level], + lpr->tiles_per_row[level]); } if (new_layout != cur_layout) @@ -1152,6 +1167,27 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, } +/** + * Return size of resource in bytes + */ +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource) +{ + const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource); + unsigned lvl, size = 0; + + for (lvl = 0; lvl <= lpr->base.last_level; lvl++) { + if (lpr->linear[lvl].data) + size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_LINEAR); + + if (lpr->tiled[lvl].data) + size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_TILED); + } + + return size; +} + + void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 858975bcee..a8d08d6247 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -31,12 +31,7 @@ #include "pipe/p_state.h" #include "util/u_debug.h" - - -#define LP_MAX_TEXTURE_2D_LEVELS 12 /* 2K x 2K for now */ -#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ - -#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS +#include "lp_limits.h" enum lp_texture_usage @@ -189,6 +184,10 @@ void * llvmpipe_resource_data(struct pipe_resource *resource); +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource); + + ubyte * llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, unsigned face_slice, unsigned level, diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index 0852150ba7..2b63992dd7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -122,14 +122,15 @@ tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride) /** * Convert a tiled image into a linear image. - * \param src_stride source row stride in bytes (bytes per row of tiles) * \param dst_stride dest row stride in bytes */ void lp_tiled_to_linear(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned dst_stride) + enum pipe_format format, + unsigned dst_stride, + unsigned tiles_per_row) { assert(x % TILE_SIZE == 0); assert(y % TILE_SIZE == 0); @@ -191,8 +192,6 @@ lp_tiled_to_linear(const void *src, void *dst, const uint bpp = 4; const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; const uint bytes_per_tile = tile_w * tile_h * bpp; - const uint src_stride = dst_stride * tile_w; - const uint tiles_per_row = src_stride / bytes_per_tile; uint i, j; for (j = 0; j < height; j += tile_h) { @@ -202,7 +201,7 @@ lp_tiled_to_linear(const void *src, void *dst, uint byte_offset = tile_offset * bytes_per_tile; const uint8_t *src_tile = (uint8_t *) src + byte_offset; - lp_tile_write_4ub(format, + lp_tile_unswizzle_4ub(format, src_tile, dst, dst_stride, ii, jj, tile_w, tile_h); @@ -215,13 +214,14 @@ lp_tiled_to_linear(const void *src, void *dst, /** * Convert a linear image into a tiled image. * \param src_stride source row stride in bytes - * \param dst_stride dest row stride in bytes (bytes per row of tiles) */ void lp_linear_to_tiled(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned src_stride) + enum pipe_format format, + unsigned src_stride, + unsigned tiles_per_row) { assert(x % TILE_SIZE == 0); assert(y % TILE_SIZE == 0); @@ -281,8 +281,6 @@ lp_linear_to_tiled(const void *src, void *dst, const uint bpp = 4; const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; const uint bytes_per_tile = tile_w * tile_h * bpp; - const uint dst_stride = src_stride * tile_w; - const uint tiles_per_row = dst_stride / bytes_per_tile; uint i, j; for (j = 0; j < height; j += TILE_SIZE) { @@ -292,7 +290,7 @@ lp_linear_to_tiled(const void *src, void *dst, uint byte_offset = tile_offset * bytes_per_tile; uint8_t *dst_tile = (uint8_t *) dst + byte_offset; - lp_tile_read_4ub(format, + lp_tile_swizzle_4ub(format, dst_tile, src, src_stride, ii, jj, tile_w, tile_h); @@ -320,10 +318,10 @@ test_tiled_linear_conversion(void *data, /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/ lp_linear_to_tiled(data, tiled, 0, 0, width, height, format, - stride); + stride, wt); lp_tiled_to_linear(tiled, data, 0, 0, width, height, format, - stride); + stride, wt); free(tiled); } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h index d74621925d..8de8efc6c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -33,14 +33,18 @@ void lp_tiled_to_linear(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned dst_stride); + enum pipe_format format, + unsigned dst_stride, + unsigned tiles_per_row); void lp_linear_to_tiled(const void *src, void *dst, unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, unsigned src_stride); + enum pipe_format format, + unsigned src_stride, + unsigned tiles_per_row); void diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 9d6a88afec..07f71b8411 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" #include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ -#include "lp_tile_size.h" +#include "lp_limits.h" #ifdef __cplusplus extern "C" { @@ -51,7 +51,10 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; #define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 -extern int tile_write_count, tile_read_count; +#ifdef DEBUG +extern unsigned lp_tile_unswizzle_count; +extern unsigned lp_tile_swizzle_count; +#endif /** @@ -73,14 +76,14 @@ tile_pixel_offset(unsigned x, unsigned y, unsigned c) void -lp_tile_read_4ub(enum pipe_format format, +lp_tile_swizzle_4ub(enum pipe_format format, uint8_t *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h); void -lp_tile_write_4ub(enum pipe_format format, +lp_tile_unswizzle_4ub(enum pipe_format format, const uint8_t *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 65810b6f8f..5ab63cbac6 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -58,7 +58,7 @@ def is_format_supported(format): channel = format.channels[i] if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): return False - if channel.type == FLOAT and channel.size not in (32 ,64): + if channel.type == FLOAT and channel.size not in (16, 32 ,64): return False if format.colorspace not in ('rgb', 'srgb'): @@ -75,7 +75,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_native_type = native_type(format) print 'static void' - print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) + print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) print '{' print ' unsigned x, y;' print ' const uint8_t *src_row = src + y0*src_stride;' @@ -193,7 +193,7 @@ def pack_rgba(format, src_channel, r, g, b, a): return expr -def emit_unrolled_write_code(format, src_channel): +def emit_unrolled_unswizzle_code(format, src_channel): '''Emit code for writing a block based on unrolled loops. This is considerably faster than the TILE_PIXEL-based code below. ''' @@ -223,7 +223,7 @@ def emit_unrolled_write_code(format, src_channel): print ' }' -def emit_tile_pixel_write_code(format, src_channel): +def emit_tile_pixel_unswizzle_code(format, src_channel): '''Emit code for writing a block based on the TILE_PIXEL macro.''' dst_native_type = native_type(format) @@ -257,7 +257,7 @@ def emit_tile_pixel_write_code(format, src_channel): value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) print ' *dst_pixel++ = %s;' % value - else: + elif dst_channel.size: print ' ++dst_pixel;' else: assert False @@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static void' - print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) print '{' if format.layout == PLAIN \ and format.colorspace == 'rgb' \ @@ -282,14 +282,14 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): and not format.is_mixed() \ and (format.channels[0].type == UNSIGNED \ or format.channels[1].type == UNSIGNED): - emit_unrolled_write_code(format, src_channel) + emit_unrolled_unswizzle_code(format, src_channel) else: - emit_tile_pixel_write_code(format, src_channel) + emit_tile_pixel_unswizzle_code(format, src_channel) print '}' print -def generate_read(formats, dst_channel, dst_native_type, dst_suffix): +def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' for format in formats: @@ -297,15 +297,17 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' - print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) + print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) print '{' print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type - print ' tile_read_count += 1;' + print '#ifdef DEBUG' + print ' lp_tile_swizzle_count += 1;' + print '#endif' print ' switch(format) {' for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix) + print ' func = &lp_tile_%s_swizzle_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' @@ -316,7 +318,7 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): print -def generate_write(formats, src_channel, src_native_type, src_suffix): +def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): '''Generate the dispatch function to write pixels to any format''' for format in formats: @@ -324,16 +326,18 @@ def generate_write(formats, src_channel, src_native_type, src_suffix): generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' - print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) + print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) print '{' print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type - print ' tile_write_count += 1;' + print '#ifdef DEBUG' + print ' lp_tile_unswizzle_count += 1;' + print '#endif' print ' switch(format) {' for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix) + print ' func = &lp_tile_%s_unswizzle_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' @@ -360,7 +364,10 @@ def main(): print '#include "util/u_half.h"' print '#include "lp_tile_soa.h"' print - print 'int tile_write_count=0, tile_read_count=0;' + print '#ifdef DEBUG' + print 'unsigned lp_tile_unswizzle_count = 0;' + print 'unsigned lp_tile_swizzle_count = 0;' + print '#endif' print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' @@ -388,8 +395,8 @@ def main(): native_type = 'uint8_t' suffix = '4ub' - generate_read(formats, channel, native_type, suffix) - generate_write(formats, channel, native_type, suffix) + generate_swizzle(formats, channel, native_type, suffix) + generate_unswizzle(formats, channel, native_type, suffix) if __name__ == '__main__': diff --git a/src/gallium/drivers/llvmpipe/sp2lp.sh b/src/gallium/drivers/llvmpipe/sp2lp.sh deleted file mode 100755 index c45a81ce3c..0000000000 --- a/src/gallium/drivers/llvmpipe/sp2lp.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh -# -# Port changes from softpipe to llvmpipe. Invoke as -# -# sp2lp.sh <commit> -# -# Note that this will only affect llvmpipe -- you still need to actually -# cherry-pick/merge the softpipe changes themselves if they affect directories -# outside src/gallium/drivers/softpipe - -git format-patch \ - --keep-subject \ - --relative=src/gallium/drivers/softpipe \ - --src-prefix=a/src/gallium/drivers/llvmpipe/ \ - --dst-prefix=b/src/gallium/drivers/llvmpipe/ \ - --stdout "$1^1..$1" \ -| sed \ - -e 's/\<softpipe\>/llvmpipe/g' \ - -e 's/\<sp\>/lp/g' \ - -e 's/\<softpipe_/llvmpipe_/g' \ - -e 's/\<sp_/lp_/g' \ - -e 's/\<SP_/LP_/g' \ - -e 's/\<SOFTPIPE_/LLVMPIPE_/g' \ - -e 's/\<spt\>/lpt/g' \ - -e 's/\<sps\>/lps/g' \ - -e 's/\<spfs\>/lpfs/g' \ - -e 's/\<sptex\>/lptex/g' \ - -e 's/\<setup_\(point\|line\|tri\)\>/llvmpipe_\0/g' \ - -e 's/\<llvmpipe_cached_tile\>/llvmpipe_cached_tex_tile/g' \ - -e 's/_get_cached_tile_tex\>/_get_cached_tex_tile/g' \ - -e 's/\<TILE_SIZE\>/TEX_TILE_SIZE/g' \ - -e 's/\<tile_address\>/tex_tile_address/g' \ - -e 's/\<tile->data\.color\>/tile->color/g' \ -| patch -p1 diff --git a/src/gallium/drivers/nouveau/SConscript b/src/gallium/drivers/nouveau/SConscript new file mode 100644 index 0000000000..fe7af4d2ae --- /dev/null +++ b/src/gallium/drivers/nouveau/SConscript @@ -0,0 +1,11 @@ +Import('*') + +env = env.Clone() + +nouveau = env.ConvenienceLibrary( + target = 'nouveau', + source = [ + 'nouveau_screen.c', + ]) + +Export('nouveau') diff --git a/src/gallium/drivers/nv50/SConscript b/src/gallium/drivers/nv50/SConscript new file mode 100644 index 0000000000..8625f92622 --- /dev/null +++ b/src/gallium/drivers/nv50/SConscript @@ -0,0 +1,26 @@ +Import('*') + +env = env.Clone() + +nv50 = env.ConvenienceLibrary( + target = 'nv50', + source = [ + 'nv50_buffer.c', + 'nv50_clear.c', + 'nv50_context.c', + 'nv50_draw.c', + 'nv50_miptree.c', + 'nv50_query.c', + 'nv50_program.c', + 'nv50_resource.c', + 'nv50_screen.c', + 'nv50_state.c', + 'nv50_state_validate.c', + 'nv50_surface.c', + 'nv50_tex.c', + 'nv50_transfer.c', + 'nv50_vbo.c', + 'nv50_push.c', + ]) + +Export('nv50') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b8b6b12120..0156ff95ff 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3169,15 +3169,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (pc->p->type == PIPE_SHADER_FRAGMENT) nv50_fp_move_results(pc); - /* last insn must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - else - if (is_immd(pc->p->exec_tail) || + if (!pc->p->exec_tail || + is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail) || is_control_flow(pc->p->exec_tail)) emit_nop(pc); + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ terminate_mbb(pc); @@ -4162,7 +4163,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) struct pipe_transfer *transfer; if (!p->data[0] && p->immd_nr) { - struct nouveau_resource *heap = nv50->screen->immd_heap[0]; + struct nouveau_resource *heap = nv50->screen->immd_heap; if (nouveau_resource_alloc(heap, p->immd_nr, p, &p->data[0])) { while (heap->next && heap->size < p->immd_nr) { @@ -4180,7 +4181,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - assert(p->param_nr <= 512); + assert(p->param_nr <= 16384); if (p->param_nr) { unsigned cb; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ad17991be9..2dd1042424 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -190,9 +190,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->tesla); nouveau_grobj_free(&screen->eng2d); nouveau_grobj_free(&screen->m2mf); - nouveau_resource_destroy(&screen->immd_heap[0]); - nouveau_resource_destroy(&screen->parm_heap[0]); - nouveau_resource_destroy(&screen->parm_heap[1]); + nouveau_resource_destroy(&screen->immd_heap); nouveau_screen_fini(&screen->base); FREE(screen); } @@ -242,7 +240,7 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOC (chan, screen->constbuf_parm[i], - ((NV50_CB_PVP + i) << 16) | 0x0800, rl, 0, 0); + ((NV50_CB_PVP + i) << 16) | 0x0000, rl, 0, 0); } } @@ -411,7 +409,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200); for (i = 0; i < 3; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (4096 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -420,14 +418,12 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0800); + /* CB_DEF_SET_SIZE value of 0x0000 means 65536 */ + OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0000); } - if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[0], 0, 512) || - nouveau_resource_init(&screen->parm_heap[1], 0, 512)) - { - NOUVEAU_ERR("Error initialising constant buffers.\n"); + if (nouveau_resource_init(&screen->immd_heap, 0, 128)) { + NOUVEAU_ERR("Error initialising shader immediates heap.\n"); nv50_screen_destroy(pscreen); return NULL; } diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 40ebbee72e..fbf15a7596 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -20,8 +20,7 @@ struct nv50_screen { struct nouveau_bo *constbuf_misc[1]; struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; - struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; + struct nouveau_resource *immd_heap; struct pipe_resource *strm_vbuf[16]; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index c2d9e83526..d905d95354 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -31,6 +31,27 @@ #include "util/u_tile.h" #include "util/u_format.h" +/* return TRUE for formats that can be converted among each other by NV50_2D */ +static INLINE boolean +nv50_2d_format_faithful(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + return TRUE; + default: + return FALSE; + } +} + static INLINE int nv50_format(enum pipe_format format) { @@ -47,9 +68,12 @@ nv50_format(enum pipe_format format) return NV50_2D_DST_FORMAT_R5G6B5_UNORM; case PIPE_FORMAT_B5G5R5A1_UNORM: return NV50_2D_DST_FORMAT_A1R5G5B5_UNORM; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return NV50_2D_DST_FORMAT_A2R10G10B10_UNORM; case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: return NV50_2D_DST_FORMAT_R8_UNORM; case PIPE_FORMAT_R32G32B32A32_FLOAT: return NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT; @@ -178,7 +202,9 @@ nv50_surface_copy(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; - assert(src->format == dest->format); + assert((src->format == dest->format) || + (nv50_2d_format_faithful(src->format) && + nv50_2d_format_faithful(dest->format))); nv50_surface_do_copy(screen, dest, destx, desty, src, srcx, srcy, width, height); diff --git a/src/gallium/drivers/nvfx/SConscript b/src/gallium/drivers/nvfx/SConscript new file mode 100644 index 0000000000..02d931b10e --- /dev/null +++ b/src/gallium/drivers/nvfx/SConscript @@ -0,0 +1,40 @@ +Import('*') + +env = env.Clone() + +env.PrependUnique(delete_existing=1, CPPPATH = [ + '#/src/gallium/drivers', +]) + +nvfx = env.ConvenienceLibrary( + target = 'nvfx', + source = [ + 'nv04_surface_2d.c', + 'nvfx_buffer.c', + 'nvfx_context.c', + 'nvfx_clear.c', + 'nvfx_draw.c', + 'nvfx_fragprog.c', + 'nvfx_fragtex.c', + 'nv30_fragtex.c', + 'nv40_fragtex.c', + 'nvfx_miptree.c', + 'nvfx_query.c', + 'nvfx_resource.c', + 'nvfx_screen.c', + 'nvfx_state.c', + 'nvfx_state_blend.c', + 'nvfx_state_emit.c', + 'nvfx_state_fb.c', + 'nvfx_state_rasterizer.c', + 'nvfx_state_scissor.c', + 'nvfx_state_stipple.c', + 'nvfx_state_viewport.c', + 'nvfx_state_zsa.c', + 'nvfx_surface.c', + 'nvfx_transfer.c', + 'nvfx_vbo.c', + 'nvfx_vertprog.c', + ]) + +Export('nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 4d7b7f181d..520bae5aed 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -8,6 +8,7 @@ #include "nvfx_resource.h" #include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" #include "nouveau/nouveau_pushbuf.h" #include "nouveau/nouveau_util.h" diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 5a8e00f15a..d3cd6bef96 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ r300_emit.c \ r300_flush.c \ r300_fs.c \ + r300_hyperz.c \ r300_query.c \ r300_render.c \ r300_resource.c \ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 08aec427a1..3921085d76 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -21,6 +21,7 @@ r300 = env.ConvenienceLibrary( 'r300_emit.c', 'r300_flush.c', 'r300_fs.c', + 'r300_hyperz.c', 'r300_query.c', 'r300_render.c', 'r300_resource.c', diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index deaa03e1f6..e84bce0010 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -37,14 +37,27 @@ #include "r300_state_invariant.h" #include "r300_winsys.h" +#include <inttypes.h> + static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; + struct r300_atom *atom; util_blitter_destroy(r300->blitter); draw_destroy(r300->draw); + /* Print stats, if enabled. */ + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + fprintf(stderr, "r300: Stats for context %p:\n", r300); + fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter); + foreach(atom, &r300->atom_list) { + fprintf(stderr, " : %s: %" PRIu64 " emits\n", + atom->name, atom->counter); + } + } + /* Free the OQ BO. */ context->screen->resource_destroy(context->screen, r300->oqbo); @@ -63,7 +76,6 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); - FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -112,7 +124,6 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(vertex_stream_state, 0); - R300_INIT_ATOM(vap_output_state, 6); R300_INIT_ATOM(pvs_flush, 2); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -136,7 +147,6 @@ static void r300_setup_atoms(struct r300_context* r300) r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); - r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1e4fd9e5ed..e9c8fcdc15 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -45,6 +45,8 @@ struct r300_atom { struct r300_atom *prev, *next; /* Name, for debugging. */ const char* name; + /* Stat counter. */ + uint64_t counter; /* Opaque state. */ void* state; /* Emit the state to the context. */ @@ -117,6 +119,10 @@ struct r300_rs_state { }; struct r300_rs_block { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ + uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ uint32_t count; /* R300_RS_COUNT */ uint32_t inst_count; /* R300_RS_INST_COUNT */ @@ -188,12 +194,6 @@ struct r300_vertex_stream_state { unsigned count; }; -struct r300_vap_output_state { - uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ - uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ - uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ -}; - struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -255,6 +255,10 @@ struct r300_texture { /* A pitch for each mip-level */ unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* A pitch multiplied by blockwidth as hardware wants + * the number of pixels instead of the number of blocks. */ + unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; @@ -375,7 +379,7 @@ struct r300_context { struct r300_atom query_start; /* Rasterizer state. */ struct r300_atom rs_state; - /* RS block state. */ + /* RS block state + VAP (vertex shader) output mapping state. */ struct r300_atom rs_block_state; /* Scissor state. */ struct r300_atom scissor_state; @@ -383,8 +387,6 @@ struct r300_context { struct r300_atom textures_state; /* Vertex stream formatting state. */ struct r300_atom vertex_stream_state; - /* VAP (vertex shader) output mapping state. */ - struct r300_atom vap_output_state; /* Vertex shader. */ struct r300_atom vs_state; /* Vertex shader constant buffer. */ @@ -418,6 +420,9 @@ struct r300_context { struct pipe_viewport_state viewport; + /* Stream locations for SWTCL. */ + int stream_loc_notcl[16]; + /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; /* Whether polygon offset is enabled. */ @@ -435,6 +440,9 @@ struct r300_context { /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; + + /* Stat counter. */ + uint64_t flush_counter; }; /* Convenience cast wrapper. */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 456b2ec7b9..996a4f491e 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -104,6 +104,13 @@ cs_count--; \ } while (0) +#define OUT_CS_TABLE(values, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \ + cs_winsys->write_cs_table(cs_winsys, values, count); \ + cs_count -= count; \ +} while (0) + #define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ @@ -150,6 +157,9 @@ DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \ + r300->flush_counter++; \ + } \ cs_winsys->flush_cs(cs_winsys); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 6e84bf8246..4c2836f36a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,9 +38,11 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, + { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, + { "stats", DBG_STATS, "Gather statistics (for lulz)" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 19acdaba62..23bbc6a99c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -83,7 +83,6 @@ void r300_emit_clip_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; - int i; CS_LOCALS(r300); if (r300->screen->caps.has_tcl) { @@ -92,12 +91,7 @@ void r300_emit_clip_state(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - for (i = 0; i < 6; i++) { - OUT_CS_32F(clip->ucp[i][0]); - OUT_CS_32F(clip->ucp[i][1]); - OUT_CS_32F(clip->ucp[i][2]); - OUT_CS_32F(clip->ucp[i][3]); - } + OUT_CS_TABLE(clip->ucp, 6 * 4); OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; @@ -106,7 +100,6 @@ void r300_emit_clip_state(struct r300_context* r300, OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } - } void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) @@ -244,8 +237,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); - for(i = 0; i < 4; ++i) - OUT_CS(code->code_addr[i]); + OUT_CS_TABLE(code->code_addr, 4); OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); for (i = 0; i < code->alu.length; i++) @@ -265,8 +257,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) if (code->tex.length) { OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - for(i = 0; i < code->tex.length; ++i) - OUT_CS(code->tex.inst[i]); + OUT_CS_TABLE(code->tex.inst, code->tex.length); } /* Emit immediates. */ @@ -396,10 +387,7 @@ void r500_emit_fs(struct r300_context* r300, unsigned size, void *state) R500_GA_US_VECTOR_INDEX_TYPE_CONST | (i & R500_GA_US_VECTOR_INDEX_MASK)); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } } @@ -424,15 +412,9 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); for(i = 0; i < count; ++i) { - const float *data; assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - data = buf->constants[i]; - - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -459,10 +441,7 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo R500_GA_US_VECTOR_INDEX_TYPE_CONST | (i & R500_GA_US_VECTOR_INDEX_MASK)); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } END_CS; @@ -738,13 +717,20 @@ void r300_emit_rs_block_state(struct r300_context* r300, DBG(r300, DBG_DRAW, "r300: RS emit:\n"); BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(rs->vap_vtx_state_cntl); + OUT_CS(rs->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(rs->vap_out_vtx_fmt[0]); + OUT_CS(rs->vap_out_vtx_fmt[1]); + if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { OUT_CS_REG_SEQ(R300_RS_IP_0, count); } + OUT_CS_TABLE(rs->ip, count); for (i = 0; i < count; i++) { - OUT_CS(rs->ip[i]); DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); } @@ -757,8 +743,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, } else { OUT_CS_REG_SEQ(R300_RS_INST_0, count); } + OUT_CS_TABLE(rs->inst, count); for (i = 0; i < count; i++) { - OUT_CS(rs->inst[i]); DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); } @@ -823,7 +809,7 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -void r300_emit_aos(struct r300_context* r300, unsigned offset) +void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; @@ -832,9 +818,18 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); + for (i = 0; i < aos_count; i++) { + if ((vbuf[velem[i].vertex_buffer_index].buffer_offset + velem[i].src_offset) % 4 != 0) { + /* XXX We must align the buffer. */ + assert(0); + fprintf(stderr, "r300: Unaligned vertex buffer offsets aren't supported, aborting..\n"); + abort(); + } + } + BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count); + OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); for (i = 0; i < aos_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; @@ -899,39 +894,20 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); + OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); for (i = 0; i < streams->count; i++) { - OUT_CS(streams->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, streams->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); + OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); for (i = 0; i < streams->count; i++) { - OUT_CS(streams->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, streams->vap_prog_stream_cntl_ext[i]); } END_CS; } -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state) -{ - struct r300_vap_output_state *vap_out_state = - (struct r300_vap_output_state*)state; - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); - - BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vap_out_state->vap_vtx_state_cntl); - OUT_CS(vap_out_state->vap_vsm_vtx_assm); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); - OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); - END_CS; -} - void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); @@ -978,9 +954,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); - for (i = 0; i < code->length; i++) { - OUT_CS(code->body.d[i]); - } + OUT_CS_TABLE(code->body.d, code->length); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | @@ -997,10 +971,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4); for (i = imm_first; i < imm_end; i++) { const float *data = vs->code.constants.Constants[i].u.Immediate; - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } END_CS; @@ -1009,7 +980,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state) { - unsigned i; unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; @@ -1023,13 +993,7 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - for (i = 0; i < count; i++) { - const float *data = buf->constants[i]; - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -1188,6 +1152,11 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) } } + /* emit_query_end is not atomized. */ + dwords += 26; + /* let's reserve some more, just in case */ + dwords += 32; + return dwords; } @@ -1200,6 +1169,9 @@ void r300_emit_dirty_state(struct r300_context* r300) foreach(atom, &r300->atom_list) { if (atom->dirty) { atom->emit(r300, atom->size, atom->state); + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + atom->counter++; + } atom->dirty = FALSE; } } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 56f7318cdb..3c0edf6fdc 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -29,7 +29,7 @@ struct rX00_fragment_program_code; struct r300_vertex_program_code; -void r300_emit_aos(struct r300_context* r300, unsigned offset); +void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); @@ -81,9 +81,6 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state); - void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4d61f63853..88303f074c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -275,6 +275,14 @@ static void r300_translate_fragment_shader( /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); + /* Shaders with zero instructions are invalid, + * use the dummy shader instead. */ + if (shader->code.code.r500.inst_end == -1) { + rc_destroy(&compiler.Base); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (compiler.Base.Error) { fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" " instead.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c new file mode 100644 index 0000000000..b41b6b1508 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -0,0 +1,108 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + + +#include "r300_hyperz.h" +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_fs.h" + +/*****************************************************************************/ +/* The ZTOP state */ +/*****************************************************************************/ + +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} + +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; + + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; +} + +static void r300_update_ztop(struct r300_context* r300) +{ + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; + + /* This is important enough that I felt it warranted a comment. + * + * According to the docs, these are the conditions where ZTOP must be + * disabled: + * 1) Alpha testing enabled + * 2) Texture kill instructions in fragment shader + * 3) Chroma key culling enabled + * 4) W-buffering enabled + * + * The docs claim that for the first three cases, if no ZS writes happen, + * then ZTOP can be used. + * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * + * Additionally, the following conditions require disabled ZTOP: + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries + * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * + * ~C. + */ + + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ + r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + } + + r300->ztop_state.dirty = TRUE; +} + +void r300_update_hyperz_state(struct r300_context* r300) +{ + r300_update_ztop(r300); +} diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h new file mode 100644 index 0000000000..3df5053b89 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -0,0 +1,30 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_HYPERZ_H +#define R300_HYPERZ_H + +struct r300_context; + +void r300_update_hyperz_state(struct r300_context* r300); + +#endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 239f91443f..675a9317f9 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3377,7 +3377,7 @@ enum { * the last block is omitted. */ #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 - +# define R300_VC_FORCE_PREFETCH (1 << 5) # define R300_VBPNTR_SIZE0(x) ((x) >> 2) # define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8) # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 23b61df89c..7c3a7902a4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -41,9 +41,6 @@ #include "r300_render.h" #include "r300_state_derived.h" -/* XXX The DRM rejects VAP_ALT_NUM_VERTICES.. */ -//#define ENABLE_ALT_NUM_VERTS - static uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { @@ -169,6 +166,24 @@ static boolean immd_is_good_idea(struct r300_context *r300, * after resolving fallback issues (e.g. stencil ref two-sided). * ****************************************************************************/ +static boolean r500_emit_index_offset(struct r300_context *r300, int indexBias) +{ + CS_LOCALS(r300); + + if (r300->screen->caps.is_r500 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + BEGIN_CS(2); + OUT_CS_REG(R500_VAP_INDEX_OFFSET, + (indexBias & 0xFFFFFF) | (indexBias < 0 ? 1<<24 : 0)); + END_CS; + } else { + if (indexBias) + return FALSE; /* Can't do anything :( */ + } + + return TRUE; +} + void r500_emit_draw_arrays_immediate(struct r300_context *r300, unsigned mode, unsigned start, @@ -220,10 +235,12 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2 + dwords); r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); + r500_emit_index_offset(r300, 0); + BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -265,23 +282,20 @@ void r500_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) { -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; -#else - boolean alt_num_verts = FALSE; -#endif CS_LOCALS(r300); + if (count >= (1 << 24)) { + fprintf(stderr, "r300: Got a huge number of vertices: %i, " + "refusing to render.\n", count); + return; + } + + r500_emit_index_offset(r300, 0); + + BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { - if (count >= (1 << 24)) { - fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); - return; - } - BEGIN_CS(9); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } else { - BEGIN_CS(7); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -307,11 +321,7 @@ void r500_emit_draw_elements(struct r300_context *r300, { uint32_t count_dwords; uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; -#else - boolean alt_num_verts = FALSE; -#endif CS_LOCALS(r300); if (count >= (1 << 24)) { @@ -320,18 +330,20 @@ void r500_emit_draw_elements(struct r300_context *r300, return; } - assert(indexBias == 0); - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); + if (!r500_emit_index_offset(r300, indexBias)) { + fprintf(stderr, "r300: Got a non-zero index bias, " + "refusing to render.\n"); + return; + } + + BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { - BEGIN_CS(15); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } else { - BEGIN_CS(13); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -541,12 +553,9 @@ void r300_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_resource* orgIndexBuffer = indexBuffer; -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; -#else - boolean alt_num_verts = FALSE; -#endif + count > 65536 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; if (r300->skip_rendering) { @@ -574,7 +583,7 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + r300_emit_aos(r300, 0, TRUE); u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); @@ -591,11 +600,12 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; - /* 16 spare dwords are enough for emit_draw_elements. */ - if (count && r300_reserve_cs_space(r300, 16)) { + /* 16 spare dwords are enough for emit_draw_elements. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 74)) { r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + r300_emit_aos(r300, 0, TRUE); } } while (count); } @@ -622,12 +632,9 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; -#else - boolean alt_num_verts = FALSE; -#endif + count > 65536 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; if (r300->skip_rendering) { @@ -650,20 +657,21 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); if (alt_num_verts || count <= 65535) { - r300_emit_aos(r300, start); + r300_emit_aos(r300, start, FALSE); r300->emit_draw_arrays(r300, mode, count); } else { do { short_count = MIN2(count, 65535); - r300_emit_aos(r300, start); + r300_emit_aos(r300, start, FALSE); r300->emit_draw_arrays(r300, mode, short_count); start += short_count; count -= short_count; /* Again, we emit both AOS and draw_arrays so there should be - * at least 128 spare dwords. */ - if (count && r300_reserve_cs_space(r300, 128)) { + * at least 128 spare dwords. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 186)) { r300_emit_buffer_validate(r300, TRUE, NULL); r300_emit_dirty_state(r300); } @@ -896,6 +904,8 @@ static void r500_render_draw_arrays(struct vbuf_render* render, DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); + r500_emit_index_offset(r300, 0); + BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | @@ -918,6 +928,8 @@ static void r500_render_draw(struct vbuf_render* render, r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); + r500_emit_index_offset(r300, 0); + BEGIN_CS(dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8fc1d5aa00..c039126703 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "util/u_format.h" +#include "util/u_format_s3tc.h" #include "util/u_memory.h" #include "r300_context.h" @@ -319,6 +320,8 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) r300_init_screen_resource_functions(r300screen); + util_format_s3tc_init(); + return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 330bd9b36b..735c233c9e 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -71,6 +71,8 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_ANISOHQ 0x0000080 #define DBG_NO_TILING 0x0000100 #define DBG_NO_IMMD 0x0000200 +#define DBG_STATS 0x0000400 +#define DBG_RS 0x0000800 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9eb8539a65..d31e7c53f7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -538,46 +538,12 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ -static void r300_fb_update_tiling_flags(struct r300_context *r300, +static void r300_fb_set_tiling_flags(struct r300_context *r300, const struct pipe_framebuffer_state *old_state, const struct pipe_framebuffer_state *new_state) { struct r300_texture *tex; - unsigned i, j, level; - - /* Reset tiling flags for old surfaces to default values. */ - for (i = 0; i < old_state->nr_cbufs; i++) { - for (j = 0; j < new_state->nr_cbufs; j++) { - if (old_state->cbufs[i]->texture == new_state->cbufs[j]->texture) { - break; - } - } - /* If not binding the surface again... */ - if (j != new_state->nr_cbufs) { - continue; - } - - tex = r300_texture(old_state->cbufs[i]->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } - if (old_state->zsbuf && - (!new_state->zsbuf || - old_state->zsbuf->texture != new_state->zsbuf->texture)) { - tex = r300_texture(old_state->zsbuf->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } + unsigned i, level; /* Set tiling flags for new surfaces. */ for (i = 0; i < new_state->nr_cbufs; i++) { @@ -585,7 +551,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->cbufs[i]->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -594,7 +560,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->zsbuf->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -644,7 +610,8 @@ static void r300->dsa_state.dirty = TRUE; } - r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); + /* The tiling flags are dependent on the surface miplevel, unfortunately. */ + r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); @@ -719,10 +686,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_mark_fs_code_dirty(r300); r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ - - if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { - r300->vap_output_state.dirty = TRUE; - } } /* Delete fragment shader state. */ @@ -1072,11 +1035,9 @@ r300_create_sampler_view(struct pipe_context *pipe, swizzle[2] = templ->swizzle_b; swizzle[3] = templ->swizzle_a; - /* XXX Enable swizzles when they become supported. Now we get RGBA - * everywhere. And do testing! */ view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - 0); /*swizzle);*/ + swizzle); if (r300_screen(pipe->screen)->caps.is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } @@ -1271,6 +1232,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, { struct r300_vertex_element_state *velems; unsigned i, size; + enum pipe_format *format; assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); @@ -1279,21 +1241,88 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Check if the format is aligned to the size of DWORD. */ + r300_vertex_psc(velems); + + /* Check if the format is aligned to the size of DWORD. + * We only care about the blocksizes of the formats since + * swizzles are already set up. */ for (i = 0; i < count; i++) { - size = util_format_get_blocksize(attribs[i].src_format); + format = &velems->velem[i].src_format; + + /* Replace some formats with their aligned counterparts, + * this is OK because we check for aligned strides too. */ + switch (*format) { + /* Align to RGBA8. */ + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + *format = PIPE_FORMAT_R8G8B8A8_UNORM; + continue; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + *format = PIPE_FORMAT_R8G8B8A8_SNORM; + continue; + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + *format = PIPE_FORMAT_R8G8B8A8_USCALED; + continue; + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + *format = PIPE_FORMAT_R8G8B8A8_SSCALED; + continue; + + /* Align to RG16. */ + case PIPE_FORMAT_R16_UNORM: + *format = PIPE_FORMAT_R16G16_UNORM; + continue; + case PIPE_FORMAT_R16_SNORM: + *format = PIPE_FORMAT_R16G16_SNORM; + continue; + case PIPE_FORMAT_R16_USCALED: + *format = PIPE_FORMAT_R16G16_USCALED; + continue; + case PIPE_FORMAT_R16_SSCALED: + *format = PIPE_FORMAT_R16G16_SSCALED; + continue; + case PIPE_FORMAT_R16_FLOAT: + *format = PIPE_FORMAT_R16G16_FLOAT; + continue; + + /* Align to RGBA16. */ + case PIPE_FORMAT_R16G16B16_UNORM: + *format = PIPE_FORMAT_R16G16B16A16_UNORM; + continue; + case PIPE_FORMAT_R16G16B16_SNORM: + *format = PIPE_FORMAT_R16G16B16A16_SNORM; + continue; + case PIPE_FORMAT_R16G16B16_USCALED: + *format = PIPE_FORMAT_R16G16B16A16_USCALED; + continue; + case PIPE_FORMAT_R16G16B16_SSCALED: + *format = PIPE_FORMAT_R16G16B16A16_SSCALED; + continue; + case PIPE_FORMAT_R16G16B16_FLOAT: + *format = PIPE_FORMAT_R16G16B16A16_FLOAT; + continue; + + default:; + } + + size = util_format_get_blocksize(*format); if (size % 4 != 0) { /* XXX Shouldn't we align the format? */ fprintf(stderr, "r300_create_vertex_elements_state: " "Unaligned format %s:%i isn't supported\n", - util_format_name(attribs[i].src_format), size); + util_format_name(*format), size); assert(0); abort(); } } - r300_vertex_psc(velems); } } return velems; @@ -1359,14 +1388,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs_state.state = vs; - // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block - if (r300->fs.state) { - r300_vertex_shader_setup_wpos(r300); - } - memcpy(r300->vap_output_state.state, &vs->vap_out, - sizeof(struct r300_vap_output_state)); - r300->vap_output_state.dirty = TRUE; - /* The majority of the RS block bits is dependent on the vertex shader. */ r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 46c192eae1..e3adace0fa 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_fs.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state.h" @@ -42,6 +43,7 @@ enum r300_rs_swizzle { SWIZ_XYZW = 0, SWIZ_X001, SWIZ_XY01, + SWIZ_0001, }; static void r300_draw_emit_attrib(struct r300_context* r300, @@ -113,12 +115,11 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) static void r300_swtcl_vertex_psc(struct r300_context *r300) { struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; - struct r300_vertex_shader* vs = r300->vs_state.state; - struct vertex_info* vinfo = &r300->vertex_info; + struct vertex_info* vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; - int* vs_output_tab = vs->stream_loc_notcl; + int* vs_output_tab = r300->stream_loc_notcl; /* XXX hax */ memset(vstream, 0, sizeof(struct r300_vertex_stream_state)); @@ -169,10 +170,10 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) } static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R300_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -218,10 +219,10 @@ static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) } static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R500_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -267,21 +268,29 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) /* Set up the RS block. * - * This is the part of the chipset that actually does the rasterization - * of vertices into fragments. This is also the part of the chipset that - * locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300, - struct r300_shader_semantics* vs_outputs, - struct r300_shader_semantics* fs_inputs) + * This is the part of the chipset that is responsible for linking vertex + * and fragment shaders and stuffed texture coordinates. + * + * The rasterizer reads data from VAP, which produces vertex shader outputs, + * and GA, which produces stuffed texture coordinates. VAP outputs have + * precedence over GA. All outputs must be rasterized otherwise it locks up. + * If there are more outputs rasterized than is set in VAP/GA, it locks up + * too. The funky part is that this info has been pretty much obtained by trial + * and error. */ +static void r300_update_rs_block(struct r300_context *r300) { - struct r300_rs_block rs = { { 0 } }; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count; - void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + struct r300_vertex_shader *vs = r300->vs_state.state; + struct r300_shader_semantics *vs_outputs = &vs->outputs; + struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; + struct r300_rs_block rs = {0}; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; + int *stream_loc_notcl = r300->stream_loc_notcl; if (r300->screen->caps.is_r500) { rX00_rs_col = r500_rs_col; @@ -295,18 +304,39 @@ static void r300_update_rs_block(struct r300_context* r300, rX00_rs_tex_write = r300_rs_tex_write; } - /* Rasterize colors. */ + /* The position is always present in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + stream_loc_notcl[loc++] = 0; + + /* Set up the point size in VAP. */ + if (vs_outputs->psize != ATTR_UNUSED) { + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + stream_loc_notcl[loc++] = 1; + } + + /* Set up and rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || vs_outputs->color[1] != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ - rX00_rs_col(&rs, col_count, i, FALSE); + /* Set up the color in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + stream_loc_notcl[loc++] = 2 + i; + + /* Rasterize it. */ + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized color %i written to FS.\n", i); + } else { + DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i); } col_count++; } else { @@ -314,26 +344,51 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->color[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n", + i); } } } + /* Set up back-face colors. The rasterizer will do the color selection + * automatically. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + stream_loc_notcl[loc++] = 4 + i; + } + } + /* Rasterize texture coordinates. */ - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (!sprite_coord) { + /* Set up the texture coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + } + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); - if (sprite_coord) - debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized generic %i written to FS%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); + } else { + DBG(r300, DBG_RS, + "r300: Rasterized generic %i unused%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; } else { @@ -341,20 +396,31 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } } } /* Rasterize fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) { + /* Set up the fog coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n"); + } else { + DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; } else { @@ -362,25 +428,47 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } } /* Rasterize WPOS. */ - /* If the FS doesn't need it, it's not written by the VS. */ - if (vs_outputs->wpos != ATTR_UNUSED && fs_inputs->wpos != ATTR_UNUSED) { + /* Don't set it in VAP if the FS doesn't need it. */ + if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) { + /* Set up the WPOS coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + + /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); + DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n"); + fp_offset++; tex_count++; } + /* Invalidate the rest of the no-TCL (GA) stream locations. */ + for (; loc < 16;) { + stream_loc_notcl[loc++] = -1; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { - rX00_rs_col(&rs, 0, 0, TRUE); + rX00_rs_col(&rs, 0, 0, SWIZ_0001); col_count++; + + DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n"); } + DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " + "generics: %i.\n", col_count, tex_count); + rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; @@ -390,87 +478,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count*2; - } -} - -/* Update the shader-dependant states. */ -static void r300_update_derived_shader_state(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - - r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs); -} - -static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when a new depth or stencil value - * can be written and changed. */ - - /* We might optionally check for [Z func: never] and inspect the stencil - * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || - ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && - (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); -} - -static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when alpha testing can kill - * a fragment. */ - uint32_t af = dsa->alpha_function; - - return (af & R300_FG_ALPHA_FUNC_ENABLE) && - (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; -} - -static void r300_update_ztop(struct r300_context* r300) -{ - struct r300_ztop_state* ztop_state = - (struct r300_ztop_state*)r300->ztop_state.state; - - /* This is important enough that I felt it warranted a comment. - * - * According to the docs, these are the conditions where ZTOP must be - * disabled: - * 1) Alpha testing enabled - * 2) Texture kill instructions in fragment shader - * 3) Chroma key culling enabled - * 4) W-buffering enabled - * - * The docs claim that for the first three cases, if no ZS writes happen, - * then ZTOP can be used. - * - * (3) will never apply since we do not support chroma-keyed operations. - * (4) will need to be re-examined (and this comment updated) if/when - * Hyper-Z becomes supported. - * - * Additionally, the following conditions require disabled ZTOP: - * 5) Depth writes in fragment shader - * 6) Outstanding occlusion queries - * - * This register causes stalls all the way from SC to CB when changed, - * but it is buffered on-chip so it does not hurt to write it if it has - * not changed. - * - * ~C. - */ - - /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ - r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else { - ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + r300->rs_block_state.size = 11 + count*2; } - - r300->ztop_state.dirty = TRUE; } static void r300_merge_textures_and_samplers(struct r300_context* r300) @@ -568,7 +577,7 @@ void r300_update_derived_state(struct r300_context* r300) } if (r300->rs_block_state.dirty) { - r300_update_derived_shader_state(r300); + r300_update_rs_block(r300); } if (r300->draw) { @@ -578,5 +587,5 @@ void r300_update_derived_state(struct r300_context* r300) r300_swtcl_vertex_psc(r300); } - r300_update_ztop(r300); + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 480d0f7c4a..fcbdb91b67 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -374,6 +374,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } switch (desc->channel[0].type) { @@ -395,6 +396,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } break; /* Unsigned ints */ @@ -418,12 +420,14 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: desc->channel[0].size == %d\n", desc->channel[0].size); assert(0); + abort(); } break; default: fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { @@ -439,6 +443,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned i, swizzle = 0; assert(format); @@ -448,11 +453,19 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + for (i = 0; i < desc->nr_channels; i++) { + swizzle |= + MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i); + } + /* Set (0,0,0,1) in unused components. */ + for (; i < 3; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i); + } + for (; i < 4; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i); + } + + return swizzle | (0xf << R300_WRITE_ENA_SHIFT); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 64d1d18b45..cd9443fa26 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,15 +43,17 @@ void r300_emit_invariant_state(struct r300_context* r300, { CS_LOCALS(r300); + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + /* Subpixel multisampling for AA. */ + BEGIN_CS(4); + OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); + OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); + END_CS; + } + BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); /*** Graphics Backend (GB) ***/ - /* Subpixel multisampling for AA - * These are commented out because glisse's CS checker doesn't like them. - * I presume these will be re-enabled later. - * OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - * OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); - */ /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 8bebeacf86..69e6a12445 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -34,9 +34,6 @@ #include "r300_screen.h" #include "r300_winsys.h" -/* XXX Enable float textures here. */ -/*#define ENABLE_FLOAT_TEXTURES*/ - #define TILE_WIDTH 0 #define TILE_HEIGHT 1 @@ -74,7 +71,7 @@ static boolean r300_format_is_plain(enum pipe_format format) * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle) + const unsigned char *swizzle_view) { uint32_t result = 0; const struct util_format_description *desc; @@ -98,6 +95,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, R300_TX_FORMAT_SIGNED_Z, R300_TX_FORMAT_SIGNED_W, }; + unsigned char swizzle[4]; desc = util_format_description(format); @@ -144,25 +142,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add swizzle. */ - if (!swizzle) { - swizzle = desc->swizzle; - } /*else { - if (swizzle[0] != desc->swizzle[0] || - swizzle[1] != desc->swizzle[1] || - swizzle[2] != desc->swizzle[2] || - swizzle[3] != desc->swizzle[3]) - { - const char n[6] = "RGBA01"; - fprintf(stderr, "Got different swizzling! Format: %c%c%c%c, " - "View: %c%c%c%c\n", - n[desc->swizzle[0]], n[desc->swizzle[1]], - n[desc->swizzle[2]], n[desc->swizzle[3]], - n[swizzle[0]], n[swizzle[1]], n[swizzle[2]], - n[swizzle[3]]); + /* Get swizzle. */ + if (swizzle_view) { + /* Compose two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + desc->swizzle[swizzle_view[i]] : swizzle_view[i]; } - }*/ + } else { + memcpy(swizzle, desc->swizzle, sizeof(swizzle)); + } + /* Add swizzle. */ for (i = 0; i < 4; i++) { switch (swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: @@ -316,7 +307,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, } return ~0; -#if defined(ENABLE_FLOAT_TEXTURES) case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[0].size) { case 16: @@ -340,7 +330,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_32F_32F_32F_32F | result; } } -#endif } return ~0; /* Unsupported/unknown. */ @@ -405,16 +394,12 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R16G16B16A16_FLOAT: -#endif return R300_COLOR_FORMAT_ARGB16161616; /* 128-bit buffers. */ -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R32G32B32A32_FLOAT: return R300_COLOR_FORMAT_ARGB32323232; -#endif /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -532,7 +517,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - //case PIPE_FORMAT_R16G16B16A16_FLOAT: /* not in pipe_format */ + case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT: return modifier | R300_C0_SEL_R | R300_C1_SEL_G | @@ -573,7 +558,7 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, if (tex->uses_pitch) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->pitch[0] - 1) & 0x1fff; + f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); @@ -614,7 +599,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, if (util_format_is_depth_or_stencil(tex->b.b.format)) { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.depthpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | R300_DEPTHMICROTILE(tex->microtile); } @@ -622,7 +607,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, } else { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.colorpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | r300_translate_colorformat(tex->b.b.format) | R300_COLOR_TILE(tex->mip_macrotile[i]) | R300_COLOR_MICROTILE(tex->microtile); @@ -762,12 +747,12 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, struct r300_texture *tex) { - /* The kernels <= 2.6.34-rc3 compute the size of mipmapped 3D textures + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures * incorrectly. This is a workaround to prevent CS from being rejected. */ unsigned i, size; - if (screen->rws->get_value(screen->rws, R300_VID_TEX3D_MIP_BUG) && + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && tex->b.b.target == PIPE_TEXTURE_3D && tex->b.b.last_level > 0) { size = 0; @@ -813,6 +798,8 @@ static void r300_setup_miptree(struct r300_screen* screen, tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); + tex->hwpitch[i] = + tex->pitch[i] * util_format_get_blockwidth(base->format); SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 453d42b188..ba79ec068a 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -28,7 +28,7 @@ struct r300_texture; uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle); + const unsigned char *swizzle_view); uint32_t r500_tx_format_msb_bit(enum pipe_format format); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index f6428ed760..89f39af976 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -108,11 +108,9 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ case TGSI_OPCODE_IF: return RC_OPCODE_IF; /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ - /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ - /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index bfab9c3b01..b7609bad81 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -94,94 +94,6 @@ static void r300_shader_read_vs_outputs( vs_outputs->wpos = i; } -/* This function sets up: - * - VAP mapping, which maps VS registers to output semantics and - * at the same time it indicates which attributes are enabled and should - * be rasterized. - * - Stream mapping to VS outputs if TCL is not present. */ -static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs) -{ - struct r300_shader_semantics* vs_outputs = &vs->outputs; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int *stream_loc = vs->stream_loc_notcl; - int i, gen_count, tabi = 0; - boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || - vs_outputs->bcolor[1] != ATTR_UNUSED; - - vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* Position. */ - if (vs_outputs->pos != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - stream_loc[tabi++] = 0; - } else { - assert(0); - } - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - stream_loc[tabi++] = 1; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || - vs_outputs->color[1] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; - - stream_loc[tabi++] = 2 + i; - } - } - - /* Back-face colors. */ - if (any_bcolor_used) { - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); - - stream_loc[tabi++] = 4 + i; - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - } - - /* Fog coordinates. */ - if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - - /* WPOS. */ - if (gen_count < 8) { - vs->wpos_tex_output = gen_count; - stream_loc[tabi++] = 6 + gen_count; - } else { - vs_outputs->wpos = ATTR_UNUSED; - } - - for (; tabi < 16;) { - stream_loc[tabi++] = -1; - } -} - static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; @@ -246,9 +158,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } /* WPOS. */ - if (outputs->wpos != ATTR_UNUSED) { - c->code->outputs[outputs->wpos] = reg++; - } + c->code->outputs[outputs->wpos] = reg++; } static void r300_dummy_vertex_shader( @@ -286,7 +196,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, tgsi_scan_shader(tokens, &vs->info); r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_init_vs_output_mapping(vs); /* Setup the compiler */ rc_init(&compiler.Base); @@ -307,16 +216,11 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, tokens); - compiler.RequiredOutputs = - ~(~0 << (vs->info.num_outputs + - (vs->outputs.wpos != ATTR_UNUSED ? 1 : 0))); - + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; /* Insert the WPOS output. */ - if (vs->outputs.wpos != ATTR_UNUSED) { - rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); - } + rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); @@ -343,32 +247,3 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* And, finally... */ rc_destroy(&compiler.Base); } - -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int tex_output = vs->wpos_tex_output; - uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; - - if (vs->outputs.wpos == ATTR_UNUSED) { - return FALSE; - } - - if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { - /* Enable WPOS in VAP. */ - if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) { - vap_out->vap_vsm_vtx_assm |= tex_fmt; - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output)); - return TRUE; - } - } else { - /* Disable WPOS in VAP. */ - if (vap_out->vap_vsm_vtx_assm & tex_fmt) { - vap_out->vap_vsm_vtx_assm &= ~tex_fmt; - vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output)); - return TRUE; - } - } - return FALSE; -} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 56bcc3b70b..57b3fbca0b 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -39,7 +39,6 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - struct r300_vap_output_state vap_out; /* Whether the shader was replaced by a dummy one due to a shader * compilation failure. */ @@ -49,12 +48,6 @@ struct r300_vertex_shader { unsigned externals_count; unsigned immediates_count; - /* Stream locations for SWTCL or if TCL is bypassed. */ - int stream_loc_notcl[16]; - - /* Output stream location for WPOS. */ - int wpos_tex_output; - /* HWTCL-specific. */ /* Machine code (if translated) */ struct r300_vertex_program_code code; @@ -67,7 +60,4 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs, const struct tgsi_token *tokens); -/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); - #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 2bd40176d1..1642981eaa 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -39,7 +39,7 @@ enum r300_value_id { R300_VID_GB_PIPES, R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_TEX3D_MIP_BUG, + R300_VID_DRM_2_3_0, }; enum r300_reference_domain { /* bitfield */ @@ -119,6 +119,10 @@ struct r300_winsys_screen { /* Write a dword to the command buffer. */ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + /* Write a table of dwords to the command buffer. */ + void (*write_cs_table)(struct r300_winsys_screen* winsys, + const void *dwords, unsigned count); + /* Write a relocated dword to the command buffer. */ void (*write_cs_reloc)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buf, diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 5f130453c3..ae3f00f338 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -69,11 +69,6 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &uc); sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, uc.ui); - -#if !TILE_CLEAR_OPTIMIZATION - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); -#endif } } @@ -83,11 +78,6 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, cv = util_pack_z_stencil(ps->format, depth, stencil); sp_tile_cache_clear(softpipe->zsbuf_cache, zero, cv); - -#if !TILE_CLEAR_OPTIMIZATION - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); -#endif } softpipe->dirty_render_cache = TRUE; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 508fe8f764..5024fc8a81 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -104,3 +104,71 @@ softpipe_flush( struct pipe_context *pipe, *fence = NULL; } + +/** + * Flush context if necessary. + * + * Returns FALSE if it would have block, but do_not_block was set, TRUE + * otherwise. + * + * TODO: move this logic to an auxiliary library? + */ +boolean +softpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block) +{ + unsigned referenced; + + referenced = pipe->is_resource_referenced(pipe, texture, face, level); + + if ((referenced & PIPE_REFERENCED_FOR_WRITE) || + ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; + + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + + if (cpu_access) { + /* + * Flush and wait. + */ + + struct pipe_fence_handle *fence = NULL; + + if (do_not_block) + return FALSE; + + pipe->flush(pipe, flush_flags, &fence); + + if (fence) { + /* + * This is for illustrative purposes only, as softpipe does not + * have fences. + */ + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } else { + /* + * Just flush. + */ + + pipe->flush(pipe, flush_flags, NULL); + } + } + + return TRUE; +} diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index 68d9b5fa83..cb97482a71 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -28,10 +28,23 @@ #ifndef SP_FLUSH_H #define SP_FLUSH_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_fence_handle; -void softpipe_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); +void +softpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); + +boolean +softpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_block); #endif diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 7b1e058ac8..8bb0294238 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -70,6 +71,8 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -144,43 +147,77 @@ static boolean softpipe_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, - unsigned tex_usage, + unsigned bind, unsigned geom_flags ) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + const struct util_format_description *format_desc; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - switch(format) { - case PIPE_FORMAT_YUYV: - case PIPE_FORMAT_UYVY: + format_desc = util_format_description(format); + if (!format_desc) return FALSE; - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return util_format_s3tc_enabled; + if (bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) { + if(!winsys->is_displaytarget_format_supported(winsys, bind, format)) + return FALSE; + } - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_NONE: - return FALSE; + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; - default: - break; + /* + * Although possible, it is unnatural to render into compressed or YUV + * surfaces. So disable these here to avoid going into weird paths + * inside the state trackers. + */ + if (format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + /* + * TODO: Unfortunately we cannot render into anything more than 32 bits + * because we encode color clear values into a 32bit word. + */ + if (format_desc->block.bits > 32) + return FALSE; } - if(tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; + + /* + * TODO: Unfortunately we cannot render into anything more than 32 bits + * because we encode depth and stencil clear values into a 32bit word. + */ + if (format_desc->block.bits > 32) + return FALSE; + + /* + * TODO: eliminate this restriction + */ + if (format == PIPE_FORMAT_Z32_FLOAT) + return FALSE; + } + + /* + * All other operations (sampling, transfer, etc). + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + return util_format_s3tc_enabled; } - /* XXX: this is often a lie. Pull in logic from llvmpipe to fix. + /* + * Everything else should be supported by u_format. */ return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index c79f5fb05a..fbce9e042b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -248,7 +248,8 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, addr.bits.face, addr.bits.level, addr.bits.z, - PIPE_TRANSFER_READ, 0, 0, + PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, + 0, 0, u_minify(tc->texture->width0, addr.bits.level), u_minify(tc->texture->height0, addr.bits.level)); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 167b6b1161..7aa85559b2 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -39,6 +39,7 @@ #include "util/u_transfer.h" #include "sp_context.h" +#include "sp_flush.h" #include "sp_texture.h" #include "sp_screen.h" @@ -214,6 +215,35 @@ softpipe_resource_get_handle(struct pipe_screen *screen, /** + * Helper function to compute offset (in bytes) for a particular + * texture level/face/slice from the start of the buffer. + */ +static unsigned +sp_get_tex_image_offset(const struct softpipe_resource *spr, + unsigned level, unsigned face, unsigned zslice) +{ + const unsigned hgt = u_minify(spr->base.height0, level); + const unsigned nblocksy = util_format_get_nblocksy(spr->base.format, hgt); + unsigned offset = spr->level_offset[level]; + + if (spr->base.target == PIPE_TEXTURE_CUBE) { + assert(zslice == 0); + offset += face * nblocksy * spr->stride[level]; + } + else if (spr->base.target == PIPE_TEXTURE_3D) { + assert(face == 0); + offset += zslice * nblocksy * spr->stride[level]; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + return offset; +} + + +/** * Get a pipe_surface "view" into a texture resource. */ static struct pipe_surface * @@ -234,25 +264,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = spr->level_offset[level]; + ps->offset = sp_get_tex_image_offset(spr, level, face, zslice); ps->usage = usage; ps->face = face; ps->level = level; ps->zslice = zslice; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * - spr->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * - spr->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } } return ps; } @@ -290,8 +307,8 @@ softpipe_get_transfer(struct pipe_context *pipe, unsigned usage, const struct pipe_box *box) { - struct softpipe_resource *sprex = softpipe_resource(resource); - struct softpipe_transfer *spr; + struct softpipe_resource *spr = softpipe_resource(resource); + struct softpipe_transfer *spt; assert(resource); assert(sr.level <= resource->last_level); @@ -301,33 +318,41 @@ softpipe_get_transfer(struct pipe_context *pipe, assert(box->y + box->height <= u_minify(resource->height0, sr.level)); assert(box->z + box->depth <= u_minify(resource->depth0, sr.level)); - spr = CALLOC_STRUCT(softpipe_transfer); - if (spr) { - struct pipe_transfer *pt = &spr->base; + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + boolean read_only = !(usage & PIPE_TRANSFER_WRITE); + boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); + if (!softpipe_flush_resource(pipe, resource, + sr.face, sr.level, + 0, /* flush_flags */ + read_only, + TRUE, /* cpu_access */ + do_not_block)) { + /* + * It would have blocked, but state tracker requested no to. + */ + assert(do_not_block); + return NULL; + } + } + + spt = CALLOC_STRUCT(softpipe_transfer); + if (spt) { + struct pipe_transfer *pt = &spt->base; enum pipe_format format = resource->format; - int nblocksy = util_format_get_nblocksy(resource->format, - u_minify(resource->height0, sr.level)); pipe_resource_reference(&pt->resource, resource); pt->sr = sr; pt->usage = usage; pt->box = *box; - pt->stride = sprex->stride[sr.level]; + pt->stride = spr->stride[sr.level]; - spr->offset = sprex->level_offset[sr.level]; - - if (resource->target == PIPE_TEXTURE_CUBE) { - spr->offset += sr.face * nblocksy * pt->stride; - } - else if (resource->target == PIPE_TEXTURE_3D) { - spr->offset += box->z * nblocksy * pt->stride; - } - else { - assert(sr.face == 0); - assert(box->z == 0); - } - - spr->offset += - box->y / util_format_get_blockheight(format) * spr->base.stride + + spt->offset = sp_get_tex_image_offset(spr, sr.level, sr.face, box->z); + + spt->offset += + box->y / util_format_get_blockheight(format) * spt->base.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); return pt; @@ -356,26 +381,24 @@ static void * softpipe_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct softpipe_transfer *sp_transfer = softpipe_transfer(transfer); - struct softpipe_resource *sp_resource = softpipe_resource(transfer->resource); + struct softpipe_transfer *spt = softpipe_transfer(transfer); + struct softpipe_resource *spr = softpipe_resource(transfer->resource); struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; uint8_t *map; /* resources backed by display target treated specially: */ - if (sp_resource->dt) { - map = winsys->displaytarget_map(winsys, - sp_resource->dt, - transfer->usage); + if (spr->dt) { + map = winsys->displaytarget_map(winsys, spr->dt, transfer->usage); } else { - map = sp_resource->data; + map = spr->data; } if (map == NULL) return NULL; else - return map + sp_transfer->offset; + return map + spt->offset; } @@ -412,26 +435,25 @@ softpipe_user_buffer_create(struct pipe_screen *screen, unsigned bytes, unsigned bind_flags) { - struct softpipe_resource *buffer; + struct softpipe_resource *spr; - buffer = CALLOC_STRUCT(softpipe_resource); - if(!buffer) + spr = CALLOC_STRUCT(softpipe_resource); + if (!spr) return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ - buffer->base.bind = bind_flags; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.flags = 0; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; + pipe_reference_init(&spr->base.reference, 1); + spr->base.screen = screen; + spr->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ + spr->base.bind = bind_flags; + spr->base.usage = PIPE_USAGE_IMMUTABLE; + spr->base.flags = 0; + spr->base.width0 = bytes; + spr->base.height0 = 1; + spr->base.depth0 = 1; + spr->userBuffer = TRUE; + spr->data = ptr; + + return &spr->base; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index d996c2a342..f4db6f6ef0 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -103,7 +103,7 @@ sp_create_tile_cache( struct pipe_context *pipe ) * However, it breaks clearing in other situations (such as in * progs/tests/drawbuffers, see bug 24402). */ -#if 0 && TILE_CLEAR_OPTIMIZATION +#if 0 /* set flags to indicate all the tiles are cleared */ memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); #endif @@ -155,7 +155,8 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, if (ps) { tc->transfer = pipe_get_transfer(pipe, ps->texture, ps->face, ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE, + PIPE_TRANSFER_READ_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, 0, 0, ps->width, ps->height); tc->depth_stencil = (ps->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || @@ -344,9 +345,7 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) } } -#if TILE_CLEAR_OPTIMIZATION sp_tile_cache_flush_clear(tc); -#endif } #if 0 @@ -448,13 +447,8 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, tc->clear_val = clearValue; -#if TILE_CLEAR_OPTIMIZATION /* set flags to indicate all the tiles are cleared */ memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); -#else - /* disable the optimization */ - memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); -#endif for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 753d8c0daa..e03d53eb24 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -28,8 +28,6 @@ #ifndef SP_TILE_CACHE_H #define SP_TILE_CACHE_H -#define TILE_CLEAR_OPTIMIZATION 1 - #include "pipe/p_compiler.h" diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 005996d05d..da33fae62f 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -277,12 +277,13 @@ svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, ret = svga_hwtnl_simple_draw_range_elements( hwtnl, gen_buf, gen_size, + start, 0, count - 1, gen_prim, 0, - gen_nr, - start ); + gen_nr ); + if (ret) goto done; diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index 15258c1966..ba630582e5 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -14,7 +14,7 @@ svga_resource_create(struct pipe_screen *screen, if (template->target == PIPE_BUFFER) return svga_buffer_create(screen, template); else - return svga_resource_create(screen, template); + return svga_texture_create(screen, template); } @@ -26,7 +26,7 @@ svga_resource_from_handle(struct pipe_screen * screen, if (template->target == PIPE_BUFFER) return NULL; else - return svga_resource_from_handle(screen, template, whandle); + return svga_texture_from_handle(screen, template, whandle); } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index aeda3dcad5..9fc613da74 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -397,6 +397,7 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->fence_finish = svga_fence_finish; svgascreen->sws = sws; + svga_screen_init_surface_functions(svgascreen); svga_init_screen_resource_functions(svgascreen); svgascreen->use_ps30 = diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index dfaab53aef..d34d68f535 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -43,7 +43,7 @@ svga_translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R32G32_FLOAT: return SVGA3D_DECLTYPE_FLOAT2; case PIPE_FORMAT_R32G32B32_FLOAT: return SVGA3D_DECLTYPE_FLOAT3; case PIPE_FORMAT_R32G32B32A32_FLOAT: return SVGA3D_DECLTYPE_FLOAT4; - case PIPE_FORMAT_A8R8G8B8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; + case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; case PIPE_FORMAT_R8G8B8A8_USCALED: return SVGA3D_DECLTYPE_UBYTE4; case PIPE_FORMAT_R16G16_SSCALED: return SVGA3D_DECLTYPE_SHORT2; case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4; @@ -52,14 +52,10 @@ svga_translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_SNORM: return SVGA3D_DECLTYPE_SHORT4N; case PIPE_FORMAT_R16G16_UNORM: return SVGA3D_DECLTYPE_USHORT2N; case PIPE_FORMAT_R16G16B16A16_UNORM: return SVGA3D_DECLTYPE_USHORT4N; - - /* These formats don't exist yet: - * - case PIPE_FORMAT_R10G10B10_USCALED: return SVGA3D_DECLTYPE_UDEC3; - case PIPE_FORMAT_R10G10B10_SNORM: return SVGA3D_DECLTYPE_DEC3N; + case PIPE_FORMAT_R10G10B10X2_USCALED: return SVGA3D_DECLTYPE_UDEC3; + case PIPE_FORMAT_R10G10B10X2_SNORM: return SVGA3D_DECLTYPE_DEC3N; case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; - */ default: /* There are many formats without hardware support. This case diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index a6215c68cb..5133c70593 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -229,7 +229,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, - vbuffer->stride); + vbuffer->stride, vbuffer->max_index); translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 3d4f56a67b..7d7024c4a7 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -49,9 +49,7 @@ translate_opcode( case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; - case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; - case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP; case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; @@ -2686,7 +2684,6 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || - emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 8216c06260..71ba1e909d 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1142,12 +1142,12 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, trace_dump_arg_end(); if (num_buffers) { - struct pipe_vertex_buffer *_buffers = malloc(num_buffers * sizeof(*_buffers)); + struct pipe_vertex_buffer *_buffers = MALLOC(num_buffers * sizeof(*_buffers)); memcpy(_buffers, buffers, num_buffers * sizeof(*_buffers)); for (i = 0; i < num_buffers; i++) _buffers[i].buffer = trace_resource_unwrap(tr_ctx, buffers[i].buffer); pipe->set_vertex_buffers(pipe, num_buffers, _buffers); - free(_buffers); + FREE(_buffers); } else { pipe->set_vertex_buffers(pipe, num_buffers, NULL); } diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index eaa47df406..0dc8cca264 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -73,7 +73,7 @@ trace_drm_destroy(struct drm_api *_api) if (api->destroy) api->destroy(api); - free(tr_api); + FREE(tr_api); } struct drm_api * |