diff options
Diffstat (limited to 'src/gallium/drivers/i915')
29 files changed, 1334 insertions, 670 deletions
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index 94c428bebf..fba180064c 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -1,25 +1,30 @@ Random list of problems with i915g: +- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE work, the code is there. + If not fix it! A simple task, good for beginners. + +- Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly + via the hardware, look at the classic driver, more advanced. + +- What does this button do? Figure out LIS7 with regards to depth offset. + - Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. -- Tends to hang the chip after a few minutes of openarena. Looks tiling related, - at the last frame rendered has tiling corruption over the complete frame. - - Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in - wireframe mode. - -- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's - going on. Seems to depend on tiny details like whethever the sampler - relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!). + wireframe mode. Changing the cullmode to cw from none mitigates the crash. As + does emitting only one line segment (2 indices) per 3D_PRIMITIVE command in + the batch. - Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason? Texture sampling from Y-tiled buffers seems to work, though (save above problems). + RESOLVED: Y-tiling works with the render engine, but not with the blitter. + Use u_blitter and hw clears (PRIM3D_CLEAR_RECT). -- Need to validate buffers before usage. Currently do_exec on the batchbuffer - can fail with -ENOSPC. +- src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various + commands defined. Scavenge them and see what's useful. Other bugs can be found here: https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index 6e93da7620..ce2691b2fd 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -31,12 +31,15 @@ #include "i915_batchbuffer.h" -#define BEGIN_BATCH(dwords, relocs) \ - (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs)) +#define BEGIN_BATCH(dwords) \ + (i915_winsys_batchbuffer_check(i915->batch, dwords)) #define OUT_BATCH(dword) \ i915_winsys_batchbuffer_dword(i915->batch, dword) +#define OUT_BATCH_F(f) \ + i915_winsys_batchbuffer_float(i915->batch, f) + #define OUT_RELOC(buf, usage, offset) \ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index d92b2ccb31..7855403478 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -41,11 +41,9 @@ i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, - size_t dwords, - size_t relocs) + size_t dwords) { - return dwords * 4 <= i915_winsys_batchbuffer_space(batch) && - relocs <= (batch->max_relocs - batch->relocs); + return dwords * 4 <= i915_winsys_batchbuffer_space(batch); } static INLINE void @@ -57,6 +55,16 @@ i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch, } static INLINE void +i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch, + float f) +{ + union { float f; unsigned int ui; } uif; + uif.f = f; + assert (i915_winsys_batchbuffer_space(batch) >= 4); + i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui); +} + +static INLINE void i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch, unsigned dword) { @@ -71,10 +79,18 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, { assert (i915_winsys_batchbuffer_space(batch) >= size); - memcpy(data, batch->ptr, size); + memcpy(batch->ptr, data, size); batch->ptr += size; } +static INLINE boolean +i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers) +{ + return batch->iws->validate_buffers(batch, buffers, num_of_buffers); +} + static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index 97c2566515..baaed3767f 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915, I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)); + } + switch (cpp) { case 1: case 2: @@ -66,9 +71,9 @@ i915_fill_blit(struct i915_context *i915, return; } - if (!BEGIN_BATCH(6, 1)) { + if (!BEGIN_BATCH(6)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(6, 1)); + assert(BEGIN_BATCH(6)); } OUT_BATCH(CMD); OUT_BATCH(BR13); @@ -76,6 +81,8 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } void @@ -94,6 +101,7 @@ i915_copy_blit(struct i915_context *i915, unsigned CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; + struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer}; I915_DBG(DBG_BLIT, @@ -102,6 +110,11 @@ i915_copy_blit(struct i915_context *i915, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, buffers, 2)); + } + switch (cpp) { case 1: case 2: @@ -130,9 +143,9 @@ i915_copy_blit(struct i915_context *i915, */ assert (dst_pitch > 0 && src_pitch > 0); - if (!BEGIN_BATCH(8, 2)) { + if (!BEGIN_BATCH(8)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(8, 2)); + assert(BEGIN_BATCH(8)); } OUT_BATCH(CMD); OUT_BATCH(BR13); @@ -142,4 +155,6 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 6d824a507a..4a97746e98 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -31,17 +31,118 @@ #include "util/u_clear.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" #include "i915_context.h" +#include "i915_screen.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_resource.h" +#include "i915_state.h" +void +i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil, + unsigned destx, unsigned desty, unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + uint32_t clear_params, clear_color, clear_depth, clear_stencil, + clear_color8888, packed_z_stencil; + union util_color u_color; + float f_depth = depth; + struct i915_texture *cbuf_tex, *depth_tex; + + cbuf_tex = depth_tex = NULL; + clear_params = 0; + + if (buffers & PIPE_CLEAR_COLOR) { + struct pipe_surface *cbuf = i915->framebuffer.cbufs[0]; + + clear_params |= CLEARPARAM_WRITE_COLOR; + cbuf_tex = i915_texture(cbuf->texture); + util_pack_color(rgba, cbuf->format, &u_color); + if (util_format_get_blocksize(cbuf_tex->b.b.format) == 4) + clear_color = u_color.ui; + else + clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16); + + util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color); + clear_color8888 = u_color.ui; + } else + clear_color = clear_color8888 = 0; + + clear_depth = clear_stencil = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + struct pipe_surface *zbuf = i915->framebuffer.zsbuf; + + clear_params |= CLEARPARAM_WRITE_DEPTH; + depth_tex = i915_texture(zbuf->texture); + packed_z_stencil = util_pack_z_stencil(depth_tex->b.b.format, depth, stencil); + + if (util_format_get_blocksize(depth_tex->b.b.format) == 4) { + /* Avoid read-modify-write if there's no stencil. */ + if (buffers & PIPE_CLEAR_STENCIL + || depth_tex->b.b.format != PIPE_FORMAT_Z24_UNORM_S8_USCALED) { + clear_params |= CLEARPARAM_WRITE_STENCIL; + clear_stencil = packed_z_stencil & 0xff; + clear_depth = packed_z_stencil; + } else + clear_depth = packed_z_stencil & 0xffffff00; + } else { + clear_depth = (clear_depth & 0xffff) | (clear_depth << 16); + } + } + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(7 + 7)) { + FLUSH_BATCH(NULL); + + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + assert(BEGIN_BATCH(7 + 7)); + } + + OUT_BATCH(_3DSTATE_CLEAR_PARAMETERS); + OUT_BATCH(clear_params | CLEARPARAM_CLEAR_RECT); + OUT_BATCH(clear_color); + OUT_BATCH(clear_depth); + OUT_BATCH(clear_color8888); + OUT_BATCH_F(f_depth); + OUT_BATCH(clear_stencil); + + OUT_BATCH(_3DPRIMITIVE | PRIM3D_CLEAR_RECT | 5); + OUT_BATCH_F(destx + width); + OUT_BATCH_F(desty + height); + OUT_BATCH_F(destx); + OUT_BATCH_F(desty + height); + OUT_BATCH_F(destx); + OUT_BATCH_F(desty); +} /** * Clear the given buffers to the specified values. * No masking, no scissor (clear entire buffer). */ void -i915_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) +i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) { util_clear(pipe, &i915_context(pipe)->framebuffer, buffers, rgba, depth, stencil); } + +void +i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) +{ + struct i915_context *i915 = i915_context(pipe); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, buffers, rgba, depth, stencil, + 0, 0, i915->framebuffer.width, i915->framebuffer.height); +} diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 847dd6dd47..7a98ef73c1 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -39,6 +39,9 @@ #include "pipe/p_screen.h" +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE) + + /* * Draw functions */ @@ -50,18 +53,17 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; void *mapped_indices = NULL; - unsigned i; + unsigned cbuf_dirty; - if (i915->dirty) - i915_update_derived(i915); /* - * Map vertex buffers + * Ack vs contants here, helps ipers a lot. */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - void *buf = i915_buffer(i915->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } + cbuf_dirty = i915->dirty & I915_NEW_VS_CONSTANTS; + i915->dirty &= ~I915_NEW_VS_CONSTANTS; + + if (i915->dirty) + i915_update_derived(i915); /* * Map index buffer, if present @@ -70,23 +72,21 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; draw_set_mapped_index_buffer(draw, mapped_indices); - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - i915->current.constants[PIPE_SHADER_VERTEX], - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * - 4 * sizeof(float))); + if (cbuf_dirty) { + if (i915->constants[PIPE_SHADER_VERTEX]) + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float))); + else + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); + } /* * Do the drawing */ draw_vbo(i915->draw, info); - /* - * unmap vertex/index buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) draw_set_mapped_index_buffer(draw, NULL); } @@ -103,6 +103,9 @@ static void i915_destroy(struct pipe_context *pipe) int i; draw_destroy(i915->draw); + + if (i915->blitter) + util_blitter_destroy(i915->blitter); if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); @@ -113,6 +116,11 @@ static void i915_destroy(struct pipe_context *pipe) } pipe_surface_reference(&i915->framebuffer.zsbuf, NULL); + /* unbind constant buffers */ + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + pipe_resource_reference(&i915->constants[i], NULL); + } + FREE(i915); } @@ -132,16 +140,27 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.destroy = i915_destroy; - i915->base.clear = i915_clear; + if (i915_screen(screen)->debug.use_blitter) + i915->base.clear = i915_clear_blitter; + else + i915->base.clear = i915_clear_render; i915->base.draw_vbo = i915_draw_vbo; + /* init this before draw */ + util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer), + 16, UTIL_SLAB_SINGLETHREADED); + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch = i915->iws->batchbuffer_create(i915->iws); + /* * Create drawing context and plug our rendering stage into it. */ i915->draw = draw_create(&i915->base); assert(i915->draw); - if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + if (!debug_get_option_i915_no_vbuf()) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); @@ -155,12 +174,19 @@ i915_create_context(struct pipe_screen *screen, void *priv) draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); + /* augmented draw pipeline clobbers state functions */ + i915_init_fixup_state_functions(i915); + + /* Create blitter last - calls state creation functions. */ + i915->blitter = util_blitter_create(&i915->base); + assert(i915->blitter); + i915->dirty = ~0; i915->hardware_dirty = ~0; - - /* Batch stream debugging is a bit hacked up at the moment: - */ - i915->batch = i915->iws->batchbuffer_create(i915->iws); + i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; + i915->static_dirty = ~0; + i915->flush_dirty = 0; return &i915->base; } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 7103a1b8c1..dacf50e870 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -37,6 +37,9 @@ #include "tgsi/tgsi_scan.h" +#include "util/u_slab.h" +#include "util/u_blitter.h" + struct i915_winsys; struct i915_winsys_buffer; @@ -134,7 +137,6 @@ struct i915_state unsigned immediate[I915_MAX_IMMEDIATE]; unsigned dynamic[I915_MAX_DYNAMIC]; - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; /** number of constants passed in through a constant buffer */ uint num_user_constants[PIPE_SHADER_TYPES]; @@ -149,6 +151,15 @@ struct i915_state /** Describes the current hardware vertex layout */ struct vertex_info vertex_info; + /* static state (dst/depth buffer state) */ + struct i915_winsys_buffer *cbuf_bo; + unsigned cbuf_flags; + struct i915_winsys_buffer *depth_bo; + unsigned depth_flags; + unsigned dst_buf_vars; + uint32_t draw_offset; + uint32_t draw_size; + unsigned id; /* track lost context events */ }; @@ -175,7 +186,7 @@ struct i915_rasterizer_state { unsigned LIS7; unsigned sc[1]; - const struct pipe_rasterizer_state *templ; + struct pipe_rasterizer_state templ; union { float f; unsigned u; } ds[2]; }; @@ -212,21 +223,18 @@ struct i915_context { struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; struct pipe_clip_state clip; - /* XXX unneded */ struct pipe_resource *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; unsigned dirty; unsigned num_samplers; unsigned num_fragment_sampler_views; - unsigned num_vertex_buffers; struct i915_winsys_batchbuffer *batch; @@ -237,6 +245,35 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; + unsigned immediate_dirty : I915_MAX_IMMEDIATE; + unsigned dynamic_dirty : I915_MAX_DYNAMIC; + unsigned static_dirty : 4; + unsigned flush_dirty : 2; + + struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS]; + int num_validation_buffers; + + struct util_slab_mempool transfer_pool; + + /** blitter/hw-clear */ + struct blitter_context* blitter; + + /** State tracking needed by u_blitter for save/restore. */ + void *saved_fs; + void (*saved_bind_fs_state)(struct pipe_context *pipe, void *shader); + void *saved_vs; + struct pipe_clip_state saved_clip; + struct i915_velems_state *saved_velems; + unsigned saved_nr_vertex_buffers; + struct pipe_vertex_buffer saved_vertex_buffers[PIPE_MAX_ATTRIBS]; + unsigned saved_nr_samplers; + void *saved_samplers[PIPE_MAX_SAMPLERS]; + void (*saved_bind_sampler_states)(struct pipe_context *pipe, + unsigned num, void **sampler); + unsigned saved_nr_sampler_views; + struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS]; + void (*saved_set_sampler_views)(struct pipe_context *pipe, + unsigned num, struct pipe_sampler_view **views); }; /* A flag for each state_tracker state object: @@ -253,9 +290,11 @@ struct i915_context { #define I915_NEW_DEPTH_STENCIL 0x200 #define I915_NEW_SAMPLER 0x400 #define I915_NEW_SAMPLER_VIEW 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 +#define I915_NEW_VS_CONSTANTS 0x1000 +#define I915_NEW_FS_CONSTANTS 0x2000 +#define I915_NEW_GS_CONSTANTS 0x4000 +#define I915_NEW_VBO 0x8000 +#define I915_NEW_VS 0x10000 /* Driver's internally generated state flags: @@ -272,7 +311,25 @@ struct i915_context { #define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) #define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) #define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1)) + +/* hw flush handling */ +#define I915_FLUSH_CACHE 1 +#define I915_PIPELINE_FLUSH 2 + +/* split up static state */ +#define I915_DST_BUF_COLOR 1 +#define I915_DST_BUF_DEPTH 2 +#define I915_DST_VARS 4 +#define I915_DST_RECT 8 + +static INLINE +void i915_set_flush_dirty(struct i915_context *i915, unsigned flush) +{ + i915->hardware_dirty |= I915_HW_FLUSH; + i915->flush_dirty |= flush; +} /*********************************************************************** @@ -297,14 +354,20 @@ void i915_emit_hardware_state(struct i915_context *i915 ); /*********************************************************************** * i915_clear.c: */ -void i915_clear( struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil); +void i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil); +void i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil); +void i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil, + unsigned destx, unsigned desty, unsigned width, unsigned height); /*********************************************************************** * */ void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_fixup_state_functions( struct i915_context *i915 ); void i915_init_flush_functions( struct i915_context *i915 ); void i915_init_string_functions( struct i915_context *i915 ); diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index d7150c99c4..c4eed473e9 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -46,12 +46,18 @@ static const struct debug_named_value debug_options[] = { }; unsigned i915_debug = 0; -boolean i915_tiling = TRUE; -void i915_debug_init(struct i915_screen *screen) +DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0) +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_use_blitter, "I915_USE_BLITTER", FALSE) + +void i915_debug_init(struct i915_screen *is) { - i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); - i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE); + i915_debug = debug_get_option_i915_debug(); + is->debug.tiling = !debug_get_option_i915_no_tiling(); + is->debug.lie = debug_get_option_i915_lie(); + is->debug.use_blitter = debug_get_option_i915_use_blitter(); } @@ -948,7 +954,8 @@ i915_dump_dirty(struct i915_context *i915, const char *func) {I915_NEW_DEPTH_STENCIL, "depth_stencil"}, {I915_NEW_SAMPLER, "sampler"}, {I915_NEW_SAMPLER_VIEW, "sampler_view"}, - {I915_NEW_CONSTANTS, "constants"}, + {I915_NEW_VS_CONSTANTS, "vs_const"}, + {I915_NEW_FS_CONSTANTS, "fs_const"}, {I915_NEW_VBO, "vbo"}, {I915_NEW_VS, "vs"}, {0, NULL}, @@ -976,7 +983,7 @@ i915_dump_hardware_dirty(struct i915_context *i915, const char *func) {I915_HW_PROGRAM, "program"}, {I915_HW_CONSTANTS, "constants"}, {I915_HW_IMMEDIATE, "immediate"}, - {I915_HW_INVARIENT, "invarient"}, + {I915_HW_INVARIANT, "invariant"}, {0, NULL}, }; int i; diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 11af7662f0..fa60799d0c 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -46,7 +46,6 @@ struct i915_winsys_batchbuffer; #define DBG_CONSTANTS 0x20 extern unsigned i915_debug; -extern boolean i915_tiling; #ifdef DEBUG static INLINE boolean diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index a2c70b1199..b4e81147c4 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -39,34 +39,12 @@ static void i915_flush_pipe( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { struct i915_context *i915 = i915_context(pipe); draw_flush(i915->draw); -#if 0 - /* Do we need to emit an MI_FLUSH command to flush the hardware - * caches? - */ - if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { - unsigned flush = MI_FLUSH; - - if (!(flags & PIPE_FLUSH_RENDER_CACHE)) - flush |= INHIBIT_FLUSH_RENDER_CACHE; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) - flush |= FLUSH_MAP_CACHE; - - if (!BEGIN_BATCH(1, 0)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(1, 0)); - } - OUT_BATCH( flush ); - } -#endif - if (i915->batch->map == i915->batch->ptr) { return; } @@ -74,7 +52,6 @@ static void i915_flush_pipe( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); - i915->vbo_flushed = 1; I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); } @@ -93,5 +70,11 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) struct i915_winsys_batchbuffer *batch = i915->batch; batch->iws->batchbuffer_flush(batch, fence); + i915->vbo_flushed = 1; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; + i915->static_dirty = ~0; + /* kernel emits flushes in between batchbuffers */ + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 25c53210be..b145b58be3 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -924,6 +924,14 @@ i915_translate_instructions(struct i915_fp_compile *p, tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(parse.FullToken.FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { @@ -1166,15 +1174,24 @@ void i915_translate_fragment_program( struct i915_context *i915, struct i915_fragment_shader *fs) { - struct i915_fp_compile *p = i915_init_compile(i915, fs); + struct i915_fp_compile *p; const struct tgsi_token *tokens = fs->state.tokens; - i915_find_wpos_space(p); - #if 0 tgsi_dump(tokens, 0); #endif + /* hw doesn't seem to like empty frag programs, even when the depth write + * fixup gets emitted below - may that one is fishy, too? */ + if (fs->info.num_instructions == 1) { + i915_use_passthrough_shader(fs); + + return; + } + + p = i915_init_compile(i915, fs); + i915_find_wpos_space(p); + i915_translate_instructions(p, tokens); i915_fixup_depth_write(p); diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index dd997e2cf4..85656cd784 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -144,15 +144,14 @@ emit_prim( struct draw_stage *stage, vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { assert(0); return; } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index baebbc7bae..79db3b650e 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -181,6 +181,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) struct i915_winsys *iws = i915->iws; if (i915_render->vbo) { + iws->buffer_unmap(iws, i915_render->vbo); iws->buffer_destroy(iws, i915_render->vbo); /* * XXX If buffers where referenced then this should be done in @@ -208,6 +209,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, I915_NEW_VERTEX); + i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); } /** @@ -262,16 +264,13 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct i915_winsys *iws = i915->iws; if (i915->vbo_flushed) debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); #ifdef VBUF_MAP_BUFFER - i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset; #else - (void)iws; return (unsigned char *)i915_render->vbo_ptr; #endif } @@ -288,7 +287,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, i915_render->vbo_max_index = max_index; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); #ifdef VBUF_MAP_BUFFER - iws->buffer_unmap(iws, i915_render->vbo); + (void)iws; #else i915_render->map_used_start = i915_render->vertex_size * min_index; i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); @@ -466,16 +465,15 @@ draw_arrays_fallback(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } @@ -515,16 +513,15 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { assert(0); goto out; } @@ -636,16 +633,15 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } @@ -684,6 +680,15 @@ static void i915_vbuf_render_destroy(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct i915_winsys *iws = i915->iws; + + if (i915_render->vbo) { + i915->vbo = NULL; + iws->buffer_unmap(iws, i915_render->vbo); + iws->buffer_destroy(iws, i915_render->vbo); + } + FREE(i915_render); } diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index 5e4e80ddf6..6fe032cdb6 100644 --- a/src/gallium/drivers/i915/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h @@ -148,6 +148,7 @@ /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) /* Dword 1 */ +#define CLASSIC_EARLY_DEPTH (1<<31) #define TEX_DEFAULT_COLOR_OGL (0<<30) #define TEX_DEFAULT_COLOR_D3D (1<<30) #define ZR_EARLY_DEPTH (1<<29) diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c index 499233ceb9..7f52ba11d6 100644 --- a/src/gallium/drivers/i915/i915_resource.c +++ b/src/gallium/drivers/i915/i915_resource.c @@ -31,7 +31,6 @@ i915_resource_from_handle(struct pipe_screen * screen, void i915_init_resource_functions(struct i915_context *i915 ) { - i915->base.is_resource_referenced = u_default_is_resource_referenced; i915->base.get_transfer = u_get_transfer_vtbl; i915->base.transfer_map = u_transfer_map_vtbl; i915->base.transfer_flush_region = u_transfer_flush_region_vtbl; diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c index d3d6a6752a..d02c768703 100644 --- a/src/gallium/drivers/i915/i915_resource_buffer.c +++ b/src/gallium/drivers/i915/i915_resource_buffer.c @@ -60,6 +60,38 @@ i915_buffer_destroy(struct pipe_screen *screen, } +static struct pipe_transfer * +i915_get_transfer(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + + if (transfer == NULL) + return NULL; + + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; +} + +static void +i915_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct i915_context *i915 = i915_context(pipe); + util_slab_free(&i915->transfer_pool, transfer); +} + static void * i915_buffer_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) @@ -91,9 +123,8 @@ struct u_resource_vtbl i915_buffer_vtbl = { i915_buffer_get_handle, /* get_handle */ i915_buffer_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + i915_get_transfer, /* get_transfer */ + i915_transfer_destroy, /* transfer_destroy */ i915_buffer_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ u_default_transfer_unmap, /* transfer_unmap */ @@ -115,8 +146,7 @@ i915_buffer_create(struct pipe_screen *screen, buf->b.vtbl = &i915_buffer_vtbl; pipe_reference_init(&buf->b.b.reference, 1); buf->b.b.screen = screen; - - buf->data = MALLOC(template->width0); + buf->data = align_malloc(template->width0, 16); buf->free_on_destroy = TRUE; if (!buf->data) diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index f19106f341..7816925d23 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -172,19 +172,22 @@ i915_texture_set_image_offset(struct i915_texture *tex, } static enum i915_winsys_buffer_tile -i915_texture_tiling(struct pipe_resource *pt) +i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex) { - if (!i915_tiling) + if (!is->debug.tiling) return I915_TILE_NONE; - if (pt->target == PIPE_TEXTURE_1D) + if (tex->b.b.target == PIPE_TEXTURE_1D) return I915_TILE_NONE; - if (util_format_is_s3tc(pt->format)) + if (util_format_is_s3tc(tex->b.b.format)) /* XXX X-tiling might make sense */ return I915_TILE_NONE; - return I915_TILE_X; + if (is->debug.use_blitter) + return I915_TILE_X; + else + return I915_TILE_Y; } @@ -401,11 +404,7 @@ i915_texture_layout_3d(struct i915_texture *tex) static boolean i915_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -649,11 +648,7 @@ i945_texture_layout_cube(struct i915_texture *tex) static boolean i945_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -664,7 +659,7 @@ i945_texture_layout(struct i915_texture * tex) i945_texture_layout_3d(tex); break; case PIPE_TEXTURE_CUBE: - if (!util_format_is_s3tc(pt->format)) + if (!util_format_is_s3tc(tex->b.b.format)) i9x5_texture_layout_cube(tex); else i945_texture_layout_cube(tex); @@ -716,14 +711,16 @@ i915_texture_destroy(struct pipe_screen *screen, } static struct pipe_transfer * -i915_texture_get_transfer(struct pipe_context *context, +i915_texture_get_transfer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box) { + struct i915_context *i915 = i915_context(pipe); struct i915_texture *tex = i915_texture(resource); - struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); + struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + if (transfer == NULL) return NULL; @@ -737,6 +734,14 @@ i915_texture_get_transfer(struct pipe_context *context, return transfer; } +static void +i915_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct i915_context *i915 = i915_context(pipe); + util_slab_free(&i915->transfer_pool, transfer); +} + static void * i915_texture_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) @@ -754,6 +759,9 @@ i915_texture_transfer_map(struct pipe_context *pipe, assert(box->z == 0); offset = i915_texture_offset(tex, transfer->level, box->z); + /* TODO this is a sledgehammer */ + pipe->flush(pipe, NULL); + map = iws->buffer_map(iws, tex->buffer, (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); if (map == NULL) @@ -779,9 +787,8 @@ struct u_resource_vtbl i915_texture_vtbl = { i915_texture_get_handle, /* get_handle */ i915_texture_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ i915_texture_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + i915_transfer_destroy, /* transfer_destroy */ i915_texture_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ i915_texture_transfer_unmap, /* transfer_unmap */ @@ -808,6 +815,8 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; + tex->tiling = i915_texture_tiling(is, tex); + if (is->is_i945) { if (!i945_texture_layout(tex)) goto fail; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index f66478e729..e62b609eb5 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -35,7 +35,6 @@ #include "i915_debug.h" #include "i915_context.h" #include "i915_screen.h" -#include "i915_surface.h" #include "i915_resource.h" #include "i915_winsys.h" #include "i915_public.h" @@ -99,59 +98,84 @@ i915_get_name(struct pipe_screen *screen) } static int -i915_get_param(struct pipe_screen *screen, enum pipe_cap param) +i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + struct i915_screen *is = i915_screen(screen); + + switch (cap) { + /* Supported features (boolean caps). */ + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_NPOT_TEXTURES: - return 1; + case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; + + /* Features that should be supported (boolean caps). */ + /* XXX: Just test the code */ + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + /* XXX: No code but hw supports it */ case PIPE_CAP_POINT_SPRITE: + /* Also lie about these when asked to (needed for GLSL / GL 2.0) */ + return is->debug.lie ? 1 : 0; + + /* Unsupported features (boolean caps). */ + case PIPE_CAP_ARRAY_TEXTURES: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TIMER_QUERY: return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + + /* Features we can lie about (boolean caps). */ + case PIPE_CAP_GLSL: case PIPE_CAP_OCCLUSION_QUERY: + return is->debug.lie ? 1 : 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 8; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: return 0; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return I915_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - /* disable for now */ - return 0; + default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static int -i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap cap) { switch(shader) { case PIPE_SHADER_VERTEX: - return draw_get_shader_param(shader, param); + return draw_get_shader_param(shader, cap); case PIPE_SHADER_FRAGMENT: break; default: @@ -159,7 +183,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha } /* XXX: these are just shader model 2.0 values, fix this! */ - switch(param) { + switch(cap) { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: return 96; case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: @@ -192,15 +216,15 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_SUBROUTINES: return 0; default: - assert(0); + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static float -i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) +i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { + switch(cap) { case PIPE_CAP_MAX_LINE_WIDTH: /* fall-through */ case PIPE_CAP_MAX_LINE_WIDTH_AA: @@ -218,6 +242,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) return 16.0; default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } @@ -227,8 +252,7 @@ i915_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, @@ -295,24 +319,23 @@ i915_fence_reference(struct pipe_screen *screen, is->iws->fence_reference(is->iws, ptr, fence); } -static int +static boolean i915_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct i915_screen *is = i915_screen(screen); - return is->iws->fence_signalled(is->iws, fence); + return is->iws->fence_signalled(is->iws, fence) == 0; } -static int +static boolean i915_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct i915_screen *is = i915_screen(screen); - return is->iws->fence_finish(is->iws, fence); + return is->iws->fence_finish(is->iws, fence) == 0; } @@ -322,6 +345,20 @@ i915_fence_finish(struct pipe_screen *screen, static void +i915_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *winsys_drawable_handle) +{ + /* XXX: Dummy right now. */ + (void)screen; + (void)resource; + (void)level; + (void)layer; + (void)winsys_drawable_handle; +} + +static void i915_destroy_screen(struct pipe_screen *screen) { struct i915_screen *is = i915_screen(screen); @@ -372,6 +409,7 @@ i915_screen_create(struct i915_winsys *iws) is->base.winsys = NULL; is->base.destroy = i915_destroy_screen; + is->base.flush_frontbuffer = i915_flush_frontbuffer; is->base.get_name = i915_get_name; is->base.get_vendor = i915_get_vendor; diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index 0c4186c68e..cfc585b535 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -45,16 +45,12 @@ struct i915_screen struct i915_winsys *iws; boolean is_i945; -}; - -/** - * Subclass of pipe_transfer - */ -struct i915_transfer -{ - struct pipe_transfer base; - unsigned offset; + struct { + boolean tiling; + boolean lie; + boolean use_blitter; + } debug; }; @@ -69,11 +65,5 @@ i915_screen(struct pipe_screen *pscreen) return (struct i915_screen *) pscreen; } -static INLINE struct i915_transfer * -i915_transfer(struct pipe_transfer *transfer) -{ - return (struct i915_transfer *)transfer; -} - #endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index bbfcff6bc4..1b57c5776f 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -33,6 +33,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" @@ -57,10 +58,8 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; - /* - case PIPE_TEX_WRAP_MIRRORED_REPEAT: + case PIPE_TEX_WRAP_MIRROR_REPEAT: return TEXCOORDMODE_MIRROR; - */ default: return TEXCOORDMODE_WRAP; } @@ -288,6 +287,17 @@ i915_create_sampler_state(struct pipe_context *pipe, return cso; } +static void i915_fixup_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->saved_nr_samplers = num; + memcpy(&i915->saved_samplers, sampler, sizeof(void *) * num); + + i915->saved_bind_sampler_states(pipe, num, sampler); +} + static void i915_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -467,6 +477,17 @@ i915_create_fs_state(struct pipe_context *pipe, } static void +i915_fixup_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->saved_fs = shader; + + i915->saved_bind_fs_state(pipe, shader); +} + +static void i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); @@ -506,6 +527,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); + i915->saved_vs = shader; + /* just pass-through to draw module */ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); @@ -525,32 +548,74 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, struct pipe_resource *buf) { struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); + unsigned new_num = 0; + boolean diff = TRUE; - /* Make a copy of shader constants. - * During fragment program translation we may add additional - * constants to the array. - * - * We want to consider the situation where some user constants - * (ex: a material color) may change frequently but the shader program - * stays the same. In that case we should only be updating the first - * N constants, leaving any extras from shader translation alone. - */ + + /* XXX don't support geom shaders now */ + if (shader == PIPE_SHADER_GEOMETRY) + return; + + /* if we have a new buffer compare it with the old one */ if (buf) { - struct i915_buffer *ir = i915_buffer(buf); - memcpy(i915->current.constants[shader], ir->data, ir->b.b.width0); - i915->current.num_user_constants[shader] = (ir->b.b.width0 / - 4 * sizeof(float)); - } - else { - i915->current.num_user_constants[shader] = 0; + struct i915_buffer *ibuf = i915_buffer(buf); + struct pipe_resource *old_buf = i915->constants[shader]; + struct i915_buffer *old = old_buf ? i915_buffer(old_buf) : NULL; + unsigned old_num = i915->current.num_user_constants[shader]; + + new_num = ibuf->b.b.width0 / 4 * sizeof(float); + + if (old_num == new_num) { + if (old_num == 0) + diff = FALSE; +#if 0 + /* XXX no point in running this code since st/mesa only uses user buffers */ + /* Can't compare the buffer data since they are userbuffers */ + else if (old && old->free_on_destroy) + diff = memcmp(old->data, ibuf->data, ibuf->b.b.width0); +#else + (void)old; +#endif + } + } else { + diff = i915->current.num_user_constants[shader] != 0; } + /* + * flush before updateing the state. + */ + if (diff && shader == PIPE_SHADER_FRAGMENT) + draw_flush(i915->draw); + + pipe_resource_reference(&i915->constants[shader], buf); + i915->current.num_user_constants[shader] = new_num; - i915->dirty |= I915_NEW_CONSTANTS; + if (diff) + i915->dirty |= shader == PIPE_SHADER_VERTEX ? I915_NEW_VS_CONSTANTS : I915_NEW_FS_CONSTANTS; } +static void +i915_fixup_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + struct i915_context *i915 = i915_context(pipe); + int i; + + for (i = 0; i < num; i++) + pipe_sampler_view_reference(&i915->saved_sampler_views[i], + views[i]); + + for (i = num; i < i915->saved_nr_sampler_views; i++) + pipe_sampler_view_reference(&i915->saved_sampler_views[i], + NULL); + + i915->saved_nr_sampler_views = num; + + i915->saved_set_sampler_views(pipe, num, views); +} + static void i915_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -622,7 +687,8 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, i915->framebuffer.height = fb->height; i915->framebuffer.nr_cbufs = fb->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); + pipe_surface_reference(&i915->framebuffer.cbufs[i], + i < fb->nr_cbufs ? fb->cbufs[i] : NULL); } pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); @@ -637,6 +703,8 @@ static void i915_set_clip_state( struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); draw_flush(i915->draw); + i915->saved_clip = *clip; + draw_set_clip_state(i915->draw, clip); i915->dirty |= I915_NEW_CLIP; @@ -667,7 +735,7 @@ i915_create_rasterizer_state(struct pipe_context *pipe, { struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); - cso->templ = rasterizer; + cso->templ = *rasterizer; cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; cso->light_twoside = rasterizer->light_twoside; cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; @@ -738,7 +806,7 @@ static void i915_bind_rasterizer_state( struct pipe_context *pipe, /* pass-through to draw module */ draw_set_rasterizer_state(i915->draw, - (i915->rasterizer ? i915->rasterizer->templ : NULL), + (i915->rasterizer ? &(i915->rasterizer->templ) : NULL), raster); i915->dirty |= I915_NEW_RASTERIZER; @@ -755,16 +823,28 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); + struct draw_context *draw = i915->draw; + int i; - memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); - i915->num_vertex_buffers = count; + util_copy_vertex_buffers(i915->saved_vertex_buffers, + &i915->saved_nr_vertex_buffers, + buffers, count); +#if 0 + /* XXX doesn't look like this is needed */ + /* unmap old */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + } +#endif /* pass-through to draw module */ - draw_set_vertex_buffers(i915->draw, count, buffers); + draw_set_vertex_buffers(draw, count, buffers); + + /* map new */ + for (i = 0; i < count; i++) { + void *buf = i915_buffer(buffers[i].buffer)->data; + draw_set_mapped_vertex_buffer(draw, i, buf); + } } static void * @@ -789,10 +869,7 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems; - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); + i915->saved_velems = velems; /* pass-through to draw module */ if (i915_velems) { @@ -870,4 +947,16 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_index_buffer = i915_set_index_buffer; + i915->base.redefine_user_buffer = u_default_redefine_user_buffer; +} + +void +i915_init_fixup_state_functions( struct i915_context *i915 ) +{ + i915->saved_bind_fs_state = i915->base.bind_fs_state; + i915->base.bind_fs_state = i915_fixup_bind_fs_state; + i915->saved_bind_sampler_states = i915->base.bind_fragment_sampler_states; + i915->base.bind_fragment_sampler_states = i915_fixup_bind_sampler_states; + i915->saved_set_sampler_views = i915->base.set_fragment_sampler_views; + i915->base.set_fragment_sampler_views = i915_fixup_set_fragment_sampler_views; } diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h index b4074dc35b..3f4e40294e 100644 --- a/src/gallium/drivers/i915/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h @@ -48,6 +48,7 @@ extern struct i915_tracked_state i915_hw_immediate; extern struct i915_tracked_state i915_hw_dynamic; extern struct i915_tracked_state i915_hw_fs; extern struct i915_tracked_state i915_hw_framebuffer; +extern struct i915_tracked_state i915_hw_dst_buf_vars; extern struct i915_tracked_state i915_hw_constants; void i915_update_derived(struct i915_context *i915); diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 1d4026a214..59ac2f7292 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -165,6 +165,7 @@ static struct i915_tracked_state *atoms[] = { &i915_hw_dynamic, &i915_hw_fs, &i915_hw_framebuffer, + &i915_hw_dst_buf_vars, &i915_hw_constants, NULL, }; diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index d61a8c3407..204cee6fe9 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -46,18 +46,34 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect(struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords) +static INLINE void set_dynamic(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.dynamic[offset] == state) + return; + + i915->current.dynamic[offset] = state; + i915->dynamic_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + + +static INLINE void set_dynamic_array(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4)) return; - for (i = 0; i < dwords; i++) + for (i = 0; i < dwords; i++) { i915->current.dynamic[offset + i] = src[i]; + i915->dynamic_dirty |= 1 << (offset + i); + } i915->hardware_dirty |= I915_HW_DYNAMIC; } @@ -79,12 +95,7 @@ static void upload_MODES4(struct i915_context *i915) */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: - */ - set_dynamic_indirect(i915, - I915_DYNAMIC_MODES4, - &modes4, - 1); + set_dynamic(i915, I915_DYNAMIC_MODES4, modes4); } const struct i915_tracked_state i915_upload_MODES4 = { @@ -107,10 +118,7 @@ static void upload_BFO(struct i915_context *i915) bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect(i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2); + set_dynamic_array(i915, I915_DYNAMIC_BFO_0, bfo, 2); } const struct i915_tracked_state i915_upload_BFO = { @@ -141,10 +149,7 @@ static void upload_BLENDCOLOR(struct i915_context *i915) color[3]); } - set_dynamic_indirect(i915, - I915_DYNAMIC_BC_0, - bc, - 2); + set_dynamic_array(i915, I915_DYNAMIC_BC_0, bc, 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { @@ -161,10 +166,7 @@ static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - set_dynamic_indirect(i915, - I915_DYNAMIC_IAB, - &iab, - 1); + set_dynamic(i915, I915_DYNAMIC_IAB, iab); } const struct i915_tracked_state i915_upload_IAB = { @@ -179,10 +181,8 @@ const struct i915_tracked_state i915_upload_IAB = { */ static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect(i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2); + set_dynamic_array(i915, I915_DYNAMIC_DEPTHSCALE_0, + &i915->rasterizer->ds[0].u, 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { @@ -234,10 +234,7 @@ static void upload_STIPPLE(struct i915_context *i915) (p[3] << 12)); } - set_dynamic_indirect(i915, - I915_DYNAMIC_STP_0, - &st[0], - 2); + set_dynamic_array(i915, I915_DYNAMIC_STP_0, st, 2); } const struct i915_tracked_state i915_upload_STIPPLE = { @@ -253,10 +250,7 @@ const struct i915_tracked_state i915_upload_STIPPLE = { */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1); + set_dynamic(i915, I915_DYNAMIC_SC_ENA_0, i915->rasterizer->sc[0]); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { @@ -282,10 +276,7 @@ static void upload_SCISSOR_RECT(struct i915_context *i915) sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3); + set_dynamic_array(i915, I915_DYNAMIC_SC_RECT_0, sc, 3); } const struct i915_tracked_state i915_upload_SCISSOR_RECT = { diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index c48d53ffbb..0155cd8351 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -35,411 +35,425 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -static unsigned translate_format( enum pipe_format format ) +#include "util/u_math.h" +#include "util/u_memory.h" + +struct i915_tracked_hw_state { + const char *name; + void (*validate)(struct i915_context *, unsigned *batch_space); + void (*emit)(struct i915_context *); + unsigned dirty, batch_space; +}; + + +static void +validate_flush(struct i915_context *i915, unsigned *batch_space) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_B5G6R5_UNORM: - return COLOR_BUF_RGB565; - default: - assert(0); - return 0; - } + *batch_space = i915->flush_dirty ? 1 : 0; } -static unsigned translate_depth_format( enum pipe_format zformat ) +static void +emit_flush(struct i915_context *i915) { - switch (zformat) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return DEPTH_FRMT_24_FIXED_8_OTHER; - case PIPE_FORMAT_Z16_UNORM: - return DEPTH_FRMT_16_FIXED; - default: - assert(0); - return 0; - } + /* Cache handling is very cheap atm. State handling can request to flushes: + * - I915_FLUSH_CACHE which is a flush everything request and + * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. + * Because the cache handling is so dumb, no explicit "invalidate map cache". + * Also, the first is a strict superset of the latter, so the following logic + * works. */ + if (i915->flush_dirty & I915_FLUSH_CACHE) + OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); + else if (i915->flush_dirty & I915_PIPELINE_FLUSH) + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); } +uint32_t invariant_state[] = { + _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, -/** - * Examine framebuffer state to determine width, height. - */ -static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) + _3DSTATE_DFLT_DIFFUSE_CMD, 0, + + _3DSTATE_DFLT_SPEC_CMD, 0, + + _3DSTATE_DFLT_Z_CMD, 0, + + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D, + + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state for now + */ + _3DSTATE_LOAD_INDIRECT | 0, 0}; + +static void +emit_invariant(struct i915_context *i915) { - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; - return TRUE; - } - else { - *width = *height = 0; - return FALSE; - } + i915_winsys_batchbuffer_write(i915->batch, invariant_state, + Elements(invariant_state)*sizeof(uint32_t)); } -static inline uint32_t -buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +static void +validate_immediate(struct i915_context *i915, unsigned *batch_space) { - uint32_t tiling_bits = 0; - - switch (tiling) { - case I915_TILE_Y: - tiling_bits |= BUF_3D_TILE_WALK_Y; - case I915_TILE_X: - tiling_bits |= BUF_3D_TILED_SURFACE; - case I915_TILE_NONE: - break; - } + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; - return tiling_bits; + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo) + i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; + + *batch_space = 1 + util_bitcount(dirty); } -/* Push the state into the sarea and/or texture memory. - */ -void -i915_emit_hardware_state(struct i915_context *i915 ) +static void +emit_immediate(struct i915_context *i915) { - /* XXX: there must be an easier way */ - const unsigned dwords = ( 14 + - 7 + - I915_MAX_DYNAMIC + - 8 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_MAX_CONSTANT*4 + -#if 0 - i915->current.program_len + -#else - i915->fs->program_len + -#endif - 6 - ) * 3/2; /* plus 50% margin */ - const unsigned relocs = ( I915_TEX_UNITS + - 3 - ) * 3/2; /* plus 50% margin */ + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } - uintptr_t save_ptr; - size_t save_relocs; + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } +} - if (I915_DBG_ON(DBG_ATOMS)) - i915_dump_hardware_dirty(i915, __FUNCTION__); +static void +validate_dynamic(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); +} - if(!BEGIN_BATCH(dwords, relocs)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(dwords, relocs)); +static void +emit_dynamic(struct i915_context *i915) +{ + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (i915->dynamic_dirty & (1 << i)) + OUT_BATCH(i915->current.dynamic[i]); } +} - save_ptr = (uintptr_t)i915->batch->ptr; - save_relocs = i915->batch->relocs; - - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIENT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); +static void +validate_static(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = 0; + + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.cbuf_bo; + *batch_space += 3; } - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - /* FIXME: we should not do this */ - OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); - } - -#if 01 - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - OUT_BATCH(i915->current.dynamic[i]); - } + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.depth_bo; + *batch_space += 3; } -#endif -#if 01 - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + if (i915->static_dirty & I915_DST_VARS) + *batch_space += 2; + + if (i915->static_dirty & I915_DST_RECT) + *batch_space += 5; +} - if (cbuf_surface) { - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - assert(tex); +static void +emit_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, + I915_USAGE_RENDER, + 0); + } - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + /* What happens if no zbuf?? + */ + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, + I915_USAGE_RENDER, + 0); + } - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); + if (i915->static_dirty & I915_DST_VARS) { + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(i915->current.dst_buf_vars); + } +} - OUT_RELOC(tex->buffer, - I915_USAGE_RENDER, - 0); - } +static void +validate_map(struct i915_context *i915, unsigned *batch_space) +{ + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + struct i915_texture *tex; - /* What happens if no zbuf?? - */ - if (depth_surface) { - struct i915_texture *tex = i915_texture(depth_surface->texture); - unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, - depth_surface->u.tex.first_layer); - assert(tex); - assert(offset == 0); - - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - assert(tex); - OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); - - OUT_RELOC(tex->buffer, - I915_USAGE_RENDER, - 0); - } + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; - { - unsigned cformat, zformat = 0; - - if (cbuf_surface) - cformat = cbuf_surface->format; - else - cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ - cformat = translate_format(cformat); - - if (depth_surface) - zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); - - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat ); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; } } -#endif +} -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - struct i915_winsys_buffer *buf = texture->buffer; - assert(buf); - - count++; - - OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); +static void +emit_map(struct i915_context *i915) +{ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); + struct i915_winsys_buffer *buf = texture->buffer; + assert(buf); + + count++; + + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ } } -#endif + assert(count == nr); + } +} + +static void +validate_sampler(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; +} -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<<i)) { - OUT_BATCH( i915->current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } +static void +emit_sampler(struct i915_context *i915) +{ + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); } } } -#endif +} + +static void +validate_constants(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->num_constants ? + 2 + 4*i915->fs->num_constants : 0; +} + +static void +emit_constants(struct i915_context *i915) +{ + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH((1 << nr) - 1); -#if 01 - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_CONSTANTS) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; + c += 4 * i; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } #if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); } } -#endif +} -#if 01 - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { +static void +validate_program(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->program_len; +} + +static void +emit_program(struct i915_context *i915) +{ uint i; /* we should always have, at least, a pass-through program */ assert(i915->fs->program_len > 0); for (i = 0; i < i915->fs->program_len; i++) { OUT_BATCH(i915->fs->program[i]); } +} + +static void +emit_draw_rect(struct i915_context *i915) +{ + if (i915->static_dirty & I915_DST_RECT) { + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); } -#endif +} -#if 01 - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - uint w, h; - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - unsigned x, y; - int layer; - uint32_t draw_offset; - boolean ret; +static boolean +i915_validate_state(struct i915_context *i915, unsigned *batch_space) +{ + unsigned tmp; + + i915->num_validation_buffers = 0; + if (i915->hardware_dirty & I915_HW_INVARIANT) + *batch_space = Elements(invariant_state); + else + *batch_space = 0; + +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + validate_##atom(i915, &tmp); \ + *batch_space += tmp; } + VALIDATE_ATOM(flush, I915_HW_FLUSH); + VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); + VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); + VALIDATE_ATOM(static, I915_HW_STATIC); + VALIDATE_ATOM(map, I915_HW_MAP); + VALIDATE_ATOM(sampler, I915_HW_SAMPLER); + VALIDATE_ATOM(constants, I915_HW_CONSTANTS); + VALIDATE_ATOM(program, I915_HW_PROGRAM); +#undef VALIDATE_ATOM + + if (i915->num_validation_buffers == 0) + return TRUE; - ret = framebuffer_size(&i915->framebuffer, &w, &h); - assert(ret); + if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, + i915->num_validation_buffers)) + return FALSE; - layer = cbuf_surface->u.tex.first_layer; + return TRUE; +} - x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; - y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + unsigned batch_space; + uintptr_t save_ptr; - draw_offset = x | (y << 16); + assert(i915->dirty == 0); - /* XXX flush only required when the draw_offset changes! */ - OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(draw_offset); - OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16)); - OUT_BATCH(draw_offset); + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_hardware_dirty(i915, __FUNCTION__); + + if (!i915_validate_state(i915, &batch_space)) { + FLUSH_BATCH(NULL); + assert(i915_validate_state(i915, &batch_space)); } -#endif - I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, + if(!BEGIN_BATCH(batch_space)) { + FLUSH_BATCH(NULL); + assert(i915_validate_state(i915, &batch_space)); + assert(BEGIN_BATCH(batch_space)); + } + + save_ptr = (uintptr_t)i915->batch->ptr; + +#define EMIT_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) \ + emit_##atom(i915); + EMIT_ATOM(flush, I915_HW_FLUSH); + EMIT_ATOM(invariant, I915_HW_INVARIANT); + EMIT_ATOM(immediate, I915_HW_IMMEDIATE); + EMIT_ATOM(dynamic, I915_HW_DYNAMIC); + EMIT_ATOM(static, I915_HW_STATIC); + EMIT_ATOM(map, I915_HW_MAP); + EMIT_ATOM(sampler, I915_HW_SAMPLER); + EMIT_ATOM(constants, I915_HW_CONSTANTS); + EMIT_ATOM(program, I915_HW_PROGRAM); + EMIT_ATOM(draw_rect, I915_HW_STATIC); +#undef EMIT_ATOM + + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__, ((uintptr_t)i915->batch->ptr - save_ptr) / 4, - i915->batch->relocs - save_relocs); + batch_space); + assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space); i915->hardware_dirty = 0; + i915->immediate_dirty = 0; + i915->dynamic_dirty = 0; + i915->static_dirty = 0; + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_fpc.c b/src/gallium/drivers/i915/i915_state_fpc.c index ec7cec0e47..1959a24691 100644 --- a/src/gallium/drivers/i915/i915_state_fpc.c +++ b/src/gallium/drivers/i915/i915_state_fpc.c @@ -40,7 +40,7 @@ static void update_hw_constants(struct i915_context *i915) struct i915_tracked_state i915_hw_constants = { "hw_constants", update_hw_constants, - I915_NEW_CONSTANTS | I915_NEW_FS + I915_NEW_FS_CONSTANTS | I915_NEW_FS }; diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index f9ade7077f..8134864739 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -36,12 +36,20 @@ #include "util/u_memory.h" -/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. - * Would like to opportunistically recombine all these fragments into - * a single packet containing only what has changed, but for now emit - * as multiple packets. +/* Convinience function to check immediate state. */ +static INLINE void set_immediate(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.immediate[offset] == state) + return; + + i915->current.immediate[offset] = state; + i915->immediate_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_IMMEDIATE; +} @@ -56,9 +64,14 @@ static void upload_S0S1(struct i915_context *i915) */ LIS0 = i915->vbo_offset; + /* Need to force this */ + if (i915->dirty & I915_NEW_VBO) { + i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } + /* I915_NEW_VERTEX_SIZE */ - /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; @@ -66,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915) (vertex_size << 16)); } - /* I915_NEW_VBO - */ - if (1 || - i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) - { - i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; - i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S0, LIS0); + set_immediate(i915, I915_IMMEDIATE_S1, LIS1); } const struct i915_tracked_state i915_upload_S0S1 = { @@ -98,21 +103,13 @@ static void upload_S2S4(struct i915_context *i915) { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; - /* - debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); - */ assert(LIS4); /* should never be zero? */ } LIS4 |= i915->rasterizer->LIS4; - if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || - LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { - - i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; - i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S2, LIS2); + set_immediate(i915, I915_IMMEDIATE_S4, LIS4); } const struct i915_tracked_state i915_upload_S2S4 = { @@ -142,15 +139,12 @@ static void upload_S5(struct i915_context *i915) #if 0 /* I915_NEW_RASTERIZER */ - if (i915->state.Polygon->OffsetFill) { + if (i915->rasterizer->LIS7) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { - i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S5, LIS5); } const struct i915_tracked_state i915_upload_S5 = { @@ -180,14 +174,11 @@ static void upload_S6(struct i915_context *i915) */ LIS6 |= i915->depth_stencil->depth_LIS6; - if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { - i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S6, LIS6); } const struct i915_tracked_state i915_upload_S6 = { - "imm s6", + "imm S6", upload_S6, I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; @@ -204,10 +195,9 @@ static void upload_S7(struct i915_context *i915) */ LIS7 = i915->rasterizer->LIS7; - if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { - i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } +#if 0 + set_immediate(i915, I915_IMMEDIATE_S7, LIS7); +#endif } const struct i915_tracked_state i915_upload_S7 = { diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index dc9a4c1e2f..2865298318 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -27,17 +27,120 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_resource.h" +#include "i915_screen.h" /*********************************************************************** * Update framebuffer state */ +static unsigned translate_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_B5G6R5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format(enum pipe_format zformat) +{ + switch (zformat) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + +static inline uint32_t +buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= BUF_3D_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= BUF_3D_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} + static void update_framebuffer(struct i915_context *i915) { - /* HW emit currently references framebuffer state directly: + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + unsigned x, y; + int layer; + uint32_t draw_offset, draw_size; + + if (cbuf_surface) { + struct i915_texture *tex = i915_texture(cbuf_surface->texture); + assert(tex); + + i915->current.cbuf_bo = tex->buffer; + i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + + layer = cbuf_surface->u.tex.first_layer; + + x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; + y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; + } else { + i915->current.cbuf_bo = NULL; + x = y = 0; + } + i915->static_dirty |= I915_DST_BUF_COLOR; + + /* What happens if no zbuf?? */ + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, + depth_surface->u.tex.first_layer); + assert(tex); + assert(offset == 0); + + i915->current.depth_bo = tex->buffer; + i915->current.depth_flags = BUF_3D_ID_DEPTH | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + } else + i915->current.depth_bo = NULL; + i915->static_dirty |= I915_DST_BUF_DEPTH; + + /* drawing rect calculations */ + draw_offset = x | (y << 16); + draw_size = (i915->framebuffer.width - 1 + x) | + ((i915->framebuffer.height - 1 + y) << 16); + if (i915->current.draw_offset != draw_offset) { + i915->current.draw_offset = draw_offset; + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); + i915->static_dirty |= I915_DST_RECT; + } + if (i915->current.draw_size != draw_size) { + i915->current.draw_size = draw_size; + i915->static_dirty |= I915_DST_RECT; + } + i915->hardware_dirty |= I915_HW_STATIC; + + /* flush the cache in case we sample from the old renderbuffers */ + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } struct i915_tracked_state i915_hw_framebuffer = { @@ -45,3 +148,52 @@ struct i915_tracked_state i915_hw_framebuffer = { update_framebuffer, I915_NEW_FRAMEBUFFER }; + +static void update_dst_buf_vars(struct i915_context *i915) +{ + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + uint32_t dst_buf_vars, cformat, zformat; + uint32_t early_z = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + struct i915_screen *is = i915_screen(i915->base.screen); + + zformat = translate_depth_format(depth_surface->format); + + if (is->is_i945 && tex->tiling != I915_TILE_NONE + && !i915->fs->info.writes_z) + early_z = CLASSIC_EARLY_DEPTH; + } else + zformat = 0; + + dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat | + early_z; + + if (i915->current.dst_buf_vars != dst_buf_vars) { + if (early_z != (i915->current.dst_buf_vars & CLASSIC_EARLY_DEPTH)) + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); + + i915->current.dst_buf_vars = dst_buf_vars; + i915->static_dirty |= I915_DST_VARS; + i915->hardware_dirty |= I915_HW_STATIC; + } +} + +struct i915_tracked_state i915_hw_dst_buf_vars = { + "dst buf vars", + update_dst_buf_vars, + I915_NEW_FRAMEBUFFER | I915_NEW_FS +}; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index becc6e93c2..d02c420f6c 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -27,6 +27,7 @@ #include "i915_surface.h" #include "i915_resource.h" +#include "i915_state.h" #include "i915_blit.h" #include "i915_reg.h" #include "i915_screen.h" @@ -37,16 +38,119 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" +/* + * surface functions using the render engine + */ + +static void +i915_surface_copy_render(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct i915_context *i915 = i915_context(pipe); + + util_blitter_save_blend(i915->blitter, (void *)i915->blend); + util_blitter_save_depth_stencil_alpha(i915->blitter, (void *)i915->depth_stencil); + util_blitter_save_stencil_ref(i915->blitter, &i915->stencil_ref); + util_blitter_save_rasterizer(i915->blitter, (void *)i915->rasterizer); + util_blitter_save_fragment_shader(i915->blitter, i915->saved_fs); + util_blitter_save_vertex_shader(i915->blitter, i915->saved_vs); + util_blitter_save_viewport(i915->blitter, &i915->viewport); + util_blitter_save_clip(i915->blitter, &i915->saved_clip); + util_blitter_save_vertex_elements(i915->blitter, i915->saved_velems); + util_blitter_save_vertex_buffers(i915->blitter, i915->saved_nr_vertex_buffers, + i915->saved_vertex_buffers); + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + util_blitter_save_fragment_sampler_states(i915->blitter, + i915->saved_nr_samplers, + i915->saved_samplers); + util_blitter_save_fragment_sampler_views(i915->blitter, + i915->saved_nr_sampler_views, + i915->saved_sampler_views); + + util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); +} + +static void +i915_clear_render_target_render(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_framebuffer_state fb_state; + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 1; + fb_state.cbufs[0] = dst; + fb_state.zsbuf = NULL; + pipe->set_framebuffer_state(pipe, &fb_state); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, PIPE_CLEAR_COLOR, rgba, 0.0, 0x0, + dstx, dsty, width, height); + + pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state); + util_unreference_framebuffer_state(&i915->blitter->saved_fb_state); + i915->blitter->saved_fb_state.nr_cbufs = ~0; +} + +static void +i915_clear_depth_stencil_render(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_framebuffer_state fb_state; + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 0; + fb_state.zsbuf = dst; + pipe->set_framebuffer_state(pipe, &fb_state); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, clear_flags & PIPE_CLEAR_DEPTHSTENCIL, + NULL, depth, stencil, + dstx, dsty, width, height); + + pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state); + util_unreference_framebuffer_state(&i915->blitter->saved_fb_state); + i915->blitter->saved_fb_state.nr_cbufs = ~0; +} + +/* + * surface functions using the blitter + */ /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ static void -i915_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dst, unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, unsigned src_level, - const struct pipe_box *src_box) +i915_surface_copy_blitter(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { struct i915_texture *dst_tex = i915_texture(dst); struct i915_texture *src_tex = i915_texture(src); @@ -66,7 +170,6 @@ i915_surface_copy(struct pipe_context *pipe, assert(src_box->z == 0); src_offset = i915_texture_offset(src_tex, src_level, src_box->z); - assert( dst != src ); assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) ); assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) ); assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) ); @@ -81,13 +184,12 @@ i915_surface_copy(struct pipe_context *pipe, (short) src_box->width, (short) src_box->height ); } - static void -i915_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) +i915_clear_render_target_blitter(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; @@ -109,13 +211,13 @@ i915_clear_render_target(struct pipe_context *pipe, } static void -i915_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) +i915_clear_depth_stencil_blitter(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; @@ -193,9 +295,15 @@ i915_surface_destroy(struct pipe_context *ctx, void i915_init_surface_functions(struct i915_context *i915) { - i915->base.resource_copy_region = i915_surface_copy; - i915->base.clear_render_target = i915_clear_render_target; - i915->base.clear_depth_stencil = i915_clear_depth_stencil; + if (i915_screen(i915->base.screen)->debug.use_blitter) { + i915->base.resource_copy_region = i915_surface_copy_blitter; + i915->base.clear_render_target = i915_clear_render_target_blitter; + i915->base.clear_depth_stencil = i915_clear_depth_stencil_blitter; + } else { + i915->base.resource_copy_region = i915_surface_copy_render; + i915->base.clear_render_target = i915_clear_render_target_render; + i915->base.clear_depth_stencil = i915_clear_depth_stencil_render; + } i915->base.create_surface = i915_create_surface; i915->base.surface_destroy = i915_surface_destroy; } diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 24ea416f01..21cfdc9613 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -76,7 +76,6 @@ struct i915_winsys_batchbuffer { size_t size; size_t relocs; - size_t max_relocs; /*@}*/ }; @@ -95,6 +94,18 @@ struct i915_winsys { (*batchbuffer_create)(struct i915_winsys *iws); /** + * Validate buffers for usage in this batchbuffer. + * Does space-checking and asorted other book-keeping. + * + * @batch + * @buffers array to buffers to validate + * @num_of_buffers size of the passed array + */ + boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers); + + /** * Emit a relocation to a buffer. * Target position in batchbuffer is the same as ptr. * @@ -103,11 +114,12 @@ struct i915_winsys { * @usage how is the hardware going to use the buffer. * @offset add this to the reloc buffers address * @target buffer where to write the address, null for batchbuffer. + * @fenced relocation needs a fence. */ int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *reloc, enum i915_winsys_buffer_usage usage, - unsigned offset, bool fenced); + unsigned offset, boolean fenced); /** * Flush a bufferbatch. |