diff options
Diffstat (limited to 'src/gallium/drivers')
359 files changed, 37734 insertions, 16293 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b6b3a700cd..58e647a39f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,6 +37,7 @@ #include "pipe/p_format.h" #include "util/u_memory.h" #include "pipe/p_screen.h" +#include "util/u_inlines.h" #include "draw/draw_context.h" #include "draw/draw_private.h" @@ -61,6 +62,11 @@ static void cell_destroy_context( struct pipe_context *pipe ) { struct cell_context *cell = cell_context(pipe); + unsigned i; + + for (i = 0; i < cell->num_vertex_buffers; i++) { + pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL); + } util_delete_keymap(cell->fragment_ops_cache, NULL); @@ -98,18 +104,6 @@ static const struct debug_named_value cell_debug_flags[] = { DEBUG_NAMED_VALUE_END }; -static unsigned int -cell_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * cell_create_context(struct pipe_screen *screen, @@ -134,8 +128,6 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.clear = cell_clear; cell->pipe.flush = cell_flush; - cell->pipe.is_resource_referenced = cell_is_resource_referenced; - #if 0 cell->pipe.begin_query = cell_begin_query; cell->pipe.end_query = cell_end_query; diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c index e7c9fc46d9..181fef44f4 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -59,9 +59,10 @@ cell_fence_signalled(const struct cell_context *cell, } -void +boolean cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence) + const struct cell_fence *fence, + uint64_t timeout) { while (!cell_fence_signalled(cell, fence)) { usleep(10); @@ -75,6 +76,7 @@ cell_fence_finish(const struct cell_context *cell, } } #endif + return TRUE; } diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h index 536b4ba411..3568230b1c 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.h +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -36,12 +36,15 @@ cell_fence_init(struct cell_fence *fence); extern boolean cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence); + const struct cell_fence *fence, + unsigned flags); -extern void +extern boolean cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence); + const struct cell_fence *fence, + unsigned flags, + uint64_t timeout); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 8275c9dc9c..463f4d03eb 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -38,19 +38,16 @@ * Called via pipe->flush() */ void -cell_flush(struct pipe_context *pipe, unsigned flags, +cell_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence) { struct cell_context *cell = cell_context(pipe); if (fence) { *fence = NULL; - /* XXX: Implement real fencing */ - flags |= CELL_FLUSH_WAIT; } - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE)) - flags |= CELL_FLUSH_WAIT; + flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); cell_flush_int(cell, flags); diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 8d2b4b9643..0ee124a24f 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -149,8 +149,7 @@ cell_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags ) + unsigned tex_usage) { struct sw_winsys *winsys = cell_screen(screen)->winsys; diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index a065d68b5a..7f65b82619 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -33,6 +33,7 @@ #include "cell_state.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -82,8 +83,9 @@ cell_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); - memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); - cell->num_vertex_buffers = count; + util_copy_vertex_buffers(cell->vertex_buffer, + &cell->num_vertex_buffers, + buffers, count); cell->dirty |= CELL_NEW_VERTEX; @@ -114,4 +116,5 @@ cell_init_vertex_functions(struct cell_context *cell) cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; + cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index e4d289c8a4..0fefec9aae 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -29,6 +29,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "pipe/p_context.h" +#include "util/u_inlines.h" #include "fo_context.h" #include "fo_winsys.h" @@ -38,6 +39,11 @@ static void failover_destroy( struct pipe_context *pipe ) { struct failover_context *failover = failover_context( pipe ); + unsigned i; + + for (i = 0; i < failover->num_vertex_buffers; i++) { + pipe_resource_reference(&failover->vertex_buffers[i].buffer, NULL); + } FREE( failover ); } @@ -73,7 +79,7 @@ static void failover_draw_vbo( struct pipe_context *pipe, if (failover->mode == FO_SW) { if (failover->dirty) { - failover->hw->flush( failover->hw, ~0, NULL ); + failover->hw->flush( failover->hw, NULL ); failover_state_emit( failover ); } @@ -83,22 +89,10 @@ static void failover_draw_vbo( struct pipe_context *pipe, * intervening flush. Unlikely to be much performance impact to * this: */ - failover->sw->flush( failover->sw, ~0, NULL ); + failover->sw->flush( failover->sw, NULL ); } } -static unsigned int -failover_is_resource_referenced( struct pipe_context *_pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - struct failover_context *failover = failover_context( _pipe ); - struct pipe_context *pipe = (failover->mode == FO_HW) ? - failover->hw : failover->sw; - - return pipe->is_resource_referenced(pipe, resource, level, layer); -} - struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ) { @@ -144,7 +138,6 @@ struct pipe_context *failover_create( struct pipe_context *hw, #endif failover->pipe.flush = hw->flush; - failover->pipe.is_resource_referenced = failover_is_resource_referenced; failover->dirty = 0; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index c265f381b6..b4da1b8b90 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -30,6 +30,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "fo_context.h" @@ -574,10 +575,10 @@ failover_set_vertex_buffers(struct pipe_context *pipe, { struct failover_context *failover = failover_context(pipe); - memcpy(failover->vertex_buffers, vertex_buffers, - count * sizeof(vertex_buffers[0])); + util_copy_vertex_buffers(failover->vertex_buffers, + &failover->num_vertex_buffers, + vertex_buffers, count); failover->dirty |= FO_NEW_VERTEX_BUFFER; - failover->num_vertex_buffers = count; failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); } @@ -656,4 +657,5 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; + failover->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index a572ad22bd..813a21e2ee 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -381,6 +381,8 @@ galahad_create_vertex_elements_state(struct pipe_context *_pipe, struct galahad_context *glhd_pipe = galahad_context(_pipe); struct pipe_context *pipe = glhd_pipe->pipe; + /* XXX check if stride lines up with element size, at least for floats */ + return pipe->create_vertex_elements_state(pipe, num_elements, vertex_elements); @@ -759,34 +761,15 @@ galahad_clear_depth_stencil(struct pipe_context *_pipe, static void galahad_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct galahad_context *glhd_pipe = galahad_context(_pipe); struct pipe_context *pipe = glhd_pipe->pipe; pipe->flush(pipe, - flags, fence); } -static unsigned int -galahad_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, - int layer) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct galahad_resource *glhd_resource = galahad_resource(_resource); - struct pipe_context *pipe = glhd_pipe->pipe; - struct pipe_resource *resource = glhd_resource->resource; - - return pipe->is_resource_referenced(pipe, - resource, - level, - layer); -} - static struct pipe_sampler_view * galahad_context_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *_resource, @@ -960,6 +943,19 @@ galahad_context_transfer_inline_write(struct pipe_context *_context, } +static void galahad_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1023,7 +1019,6 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.clear_render_target = galahad_clear_render_target; glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil; glhd_pipe->base.flush = galahad_flush; - glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced; glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view; glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy; glhd_pipe->base.create_surface = galahad_context_create_surface; @@ -1034,6 +1029,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + glhd_pipe->base.redefine_user_buffer = galahad_redefine_user_buffer; glhd_pipe->pipe = pipe; diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c index b4825bef66..b4edebe492 100644 --- a/src/gallium/drivers/galahad/glhd_screen.c +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -106,8 +106,7 @@ galahad_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; @@ -120,8 +119,7 @@ galahad_screen_is_format_supported(struct pipe_screen *_screen, format, target, sample_count, - tex_usage, - geom_flags); + tex_usage); } static struct pipe_context * @@ -276,30 +274,28 @@ galahad_screen_fence_reference(struct pipe_screen *_screen, fence); } -static int +static boolean galahad_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; return screen->fence_signalled(screen, - fence, - flags); + fence); } -static int +static boolean galahad_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; return screen->fence_finish(screen, fence, - flags); + timeout); } struct pipe_screen * diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index 94c428bebf..fba180064c 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -1,25 +1,30 @@ Random list of problems with i915g: +- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE work, the code is there. + If not fix it! A simple task, good for beginners. + +- Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly + via the hardware, look at the classic driver, more advanced. + +- What does this button do? Figure out LIS7 with regards to depth offset. + - Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. -- Tends to hang the chip after a few minutes of openarena. Looks tiling related, - at the last frame rendered has tiling corruption over the complete frame. - - Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in - wireframe mode. - -- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's - going on. Seems to depend on tiny details like whethever the sampler - relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!). + wireframe mode. Changing the cullmode to cw from none mitigates the crash. As + does emitting only one line segment (2 indices) per 3D_PRIMITIVE command in + the batch. - Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason? Texture sampling from Y-tiled buffers seems to work, though (save above problems). + RESOLVED: Y-tiling works with the render engine, but not with the blitter. + Use u_blitter and hw clears (PRIM3D_CLEAR_RECT). -- Need to validate buffers before usage. Currently do_exec on the batchbuffer - can fail with -ENOSPC. +- src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various + commands defined. Scavenge them and see what's useful. Other bugs can be found here: https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index 6e93da7620..ce2691b2fd 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -31,12 +31,15 @@ #include "i915_batchbuffer.h" -#define BEGIN_BATCH(dwords, relocs) \ - (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs)) +#define BEGIN_BATCH(dwords) \ + (i915_winsys_batchbuffer_check(i915->batch, dwords)) #define OUT_BATCH(dword) \ i915_winsys_batchbuffer_dword(i915->batch, dword) +#define OUT_BATCH_F(f) \ + i915_winsys_batchbuffer_float(i915->batch, f) + #define OUT_RELOC(buf, usage, offset) \ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index d92b2ccb31..7855403478 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -41,11 +41,9 @@ i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, - size_t dwords, - size_t relocs) + size_t dwords) { - return dwords * 4 <= i915_winsys_batchbuffer_space(batch) && - relocs <= (batch->max_relocs - batch->relocs); + return dwords * 4 <= i915_winsys_batchbuffer_space(batch); } static INLINE void @@ -57,6 +55,16 @@ i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch, } static INLINE void +i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch, + float f) +{ + union { float f; unsigned int ui; } uif; + uif.f = f; + assert (i915_winsys_batchbuffer_space(batch) >= 4); + i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui); +} + +static INLINE void i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch, unsigned dword) { @@ -71,10 +79,18 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, { assert (i915_winsys_batchbuffer_space(batch) >= size); - memcpy(data, batch->ptr, size); + memcpy(batch->ptr, data, size); batch->ptr += size; } +static INLINE boolean +i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers) +{ + return batch->iws->validate_buffers(batch, buffers, num_of_buffers); +} + static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index 97c2566515..baaed3767f 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915, I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)); + } + switch (cpp) { case 1: case 2: @@ -66,9 +71,9 @@ i915_fill_blit(struct i915_context *i915, return; } - if (!BEGIN_BATCH(6, 1)) { + if (!BEGIN_BATCH(6)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(6, 1)); + assert(BEGIN_BATCH(6)); } OUT_BATCH(CMD); OUT_BATCH(BR13); @@ -76,6 +81,8 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } void @@ -94,6 +101,7 @@ i915_copy_blit(struct i915_context *i915, unsigned CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; + struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer}; I915_DBG(DBG_BLIT, @@ -102,6 +110,11 @@ i915_copy_blit(struct i915_context *i915, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, buffers, 2)); + } + switch (cpp) { case 1: case 2: @@ -130,9 +143,9 @@ i915_copy_blit(struct i915_context *i915, */ assert (dst_pitch > 0 && src_pitch > 0); - if (!BEGIN_BATCH(8, 2)) { + if (!BEGIN_BATCH(8)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(8, 2)); + assert(BEGIN_BATCH(8)); } OUT_BATCH(CMD); OUT_BATCH(BR13); @@ -142,4 +155,6 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 6d824a507a..4a97746e98 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -31,17 +31,118 @@ #include "util/u_clear.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" #include "i915_context.h" +#include "i915_screen.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_resource.h" +#include "i915_state.h" +void +i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil, + unsigned destx, unsigned desty, unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + uint32_t clear_params, clear_color, clear_depth, clear_stencil, + clear_color8888, packed_z_stencil; + union util_color u_color; + float f_depth = depth; + struct i915_texture *cbuf_tex, *depth_tex; + + cbuf_tex = depth_tex = NULL; + clear_params = 0; + + if (buffers & PIPE_CLEAR_COLOR) { + struct pipe_surface *cbuf = i915->framebuffer.cbufs[0]; + + clear_params |= CLEARPARAM_WRITE_COLOR; + cbuf_tex = i915_texture(cbuf->texture); + util_pack_color(rgba, cbuf->format, &u_color); + if (util_format_get_blocksize(cbuf_tex->b.b.format) == 4) + clear_color = u_color.ui; + else + clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16); + + util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color); + clear_color8888 = u_color.ui; + } else + clear_color = clear_color8888 = 0; + + clear_depth = clear_stencil = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + struct pipe_surface *zbuf = i915->framebuffer.zsbuf; + + clear_params |= CLEARPARAM_WRITE_DEPTH; + depth_tex = i915_texture(zbuf->texture); + packed_z_stencil = util_pack_z_stencil(depth_tex->b.b.format, depth, stencil); + + if (util_format_get_blocksize(depth_tex->b.b.format) == 4) { + /* Avoid read-modify-write if there's no stencil. */ + if (buffers & PIPE_CLEAR_STENCIL + || depth_tex->b.b.format != PIPE_FORMAT_Z24_UNORM_S8_USCALED) { + clear_params |= CLEARPARAM_WRITE_STENCIL; + clear_stencil = packed_z_stencil & 0xff; + clear_depth = packed_z_stencil; + } else + clear_depth = packed_z_stencil & 0xffffff00; + } else { + clear_depth = (clear_depth & 0xffff) | (clear_depth << 16); + } + } + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(7 + 7)) { + FLUSH_BATCH(NULL); + + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + assert(BEGIN_BATCH(7 + 7)); + } + + OUT_BATCH(_3DSTATE_CLEAR_PARAMETERS); + OUT_BATCH(clear_params | CLEARPARAM_CLEAR_RECT); + OUT_BATCH(clear_color); + OUT_BATCH(clear_depth); + OUT_BATCH(clear_color8888); + OUT_BATCH_F(f_depth); + OUT_BATCH(clear_stencil); + + OUT_BATCH(_3DPRIMITIVE | PRIM3D_CLEAR_RECT | 5); + OUT_BATCH_F(destx + width); + OUT_BATCH_F(desty + height); + OUT_BATCH_F(destx); + OUT_BATCH_F(desty + height); + OUT_BATCH_F(destx); + OUT_BATCH_F(desty); +} /** * Clear the given buffers to the specified values. * No masking, no scissor (clear entire buffer). */ void -i915_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) +i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) { util_clear(pipe, &i915_context(pipe)->framebuffer, buffers, rgba, depth, stencil); } + +void +i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) +{ + struct i915_context *i915 = i915_context(pipe); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, buffers, rgba, depth, stencil, + 0, 0, i915->framebuffer.width, i915->framebuffer.height); +} diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 847dd6dd47..7a98ef73c1 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -39,6 +39,9 @@ #include "pipe/p_screen.h" +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE) + + /* * Draw functions */ @@ -50,18 +53,17 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; void *mapped_indices = NULL; - unsigned i; + unsigned cbuf_dirty; - if (i915->dirty) - i915_update_derived(i915); /* - * Map vertex buffers + * Ack vs contants here, helps ipers a lot. */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - void *buf = i915_buffer(i915->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } + cbuf_dirty = i915->dirty & I915_NEW_VS_CONSTANTS; + i915->dirty &= ~I915_NEW_VS_CONSTANTS; + + if (i915->dirty) + i915_update_derived(i915); /* * Map index buffer, if present @@ -70,23 +72,21 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; draw_set_mapped_index_buffer(draw, mapped_indices); - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - i915->current.constants[PIPE_SHADER_VERTEX], - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * - 4 * sizeof(float))); + if (cbuf_dirty) { + if (i915->constants[PIPE_SHADER_VERTEX]) + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float))); + else + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); + } /* * Do the drawing */ draw_vbo(i915->draw, info); - /* - * unmap vertex/index buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) draw_set_mapped_index_buffer(draw, NULL); } @@ -103,6 +103,9 @@ static void i915_destroy(struct pipe_context *pipe) int i; draw_destroy(i915->draw); + + if (i915->blitter) + util_blitter_destroy(i915->blitter); if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); @@ -113,6 +116,11 @@ static void i915_destroy(struct pipe_context *pipe) } pipe_surface_reference(&i915->framebuffer.zsbuf, NULL); + /* unbind constant buffers */ + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + pipe_resource_reference(&i915->constants[i], NULL); + } + FREE(i915); } @@ -132,16 +140,27 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.destroy = i915_destroy; - i915->base.clear = i915_clear; + if (i915_screen(screen)->debug.use_blitter) + i915->base.clear = i915_clear_blitter; + else + i915->base.clear = i915_clear_render; i915->base.draw_vbo = i915_draw_vbo; + /* init this before draw */ + util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer), + 16, UTIL_SLAB_SINGLETHREADED); + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch = i915->iws->batchbuffer_create(i915->iws); + /* * Create drawing context and plug our rendering stage into it. */ i915->draw = draw_create(&i915->base); assert(i915->draw); - if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + if (!debug_get_option_i915_no_vbuf()) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); @@ -155,12 +174,19 @@ i915_create_context(struct pipe_screen *screen, void *priv) draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); + /* augmented draw pipeline clobbers state functions */ + i915_init_fixup_state_functions(i915); + + /* Create blitter last - calls state creation functions. */ + i915->blitter = util_blitter_create(&i915->base); + assert(i915->blitter); + i915->dirty = ~0; i915->hardware_dirty = ~0; - - /* Batch stream debugging is a bit hacked up at the moment: - */ - i915->batch = i915->iws->batchbuffer_create(i915->iws); + i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; + i915->static_dirty = ~0; + i915->flush_dirty = 0; return &i915->base; } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 7103a1b8c1..dacf50e870 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -37,6 +37,9 @@ #include "tgsi/tgsi_scan.h" +#include "util/u_slab.h" +#include "util/u_blitter.h" + struct i915_winsys; struct i915_winsys_buffer; @@ -134,7 +137,6 @@ struct i915_state unsigned immediate[I915_MAX_IMMEDIATE]; unsigned dynamic[I915_MAX_DYNAMIC]; - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; /** number of constants passed in through a constant buffer */ uint num_user_constants[PIPE_SHADER_TYPES]; @@ -149,6 +151,15 @@ struct i915_state /** Describes the current hardware vertex layout */ struct vertex_info vertex_info; + /* static state (dst/depth buffer state) */ + struct i915_winsys_buffer *cbuf_bo; + unsigned cbuf_flags; + struct i915_winsys_buffer *depth_bo; + unsigned depth_flags; + unsigned dst_buf_vars; + uint32_t draw_offset; + uint32_t draw_size; + unsigned id; /* track lost context events */ }; @@ -175,7 +186,7 @@ struct i915_rasterizer_state { unsigned LIS7; unsigned sc[1]; - const struct pipe_rasterizer_state *templ; + struct pipe_rasterizer_state templ; union { float f; unsigned u; } ds[2]; }; @@ -212,21 +223,18 @@ struct i915_context { struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; struct pipe_clip_state clip; - /* XXX unneded */ struct pipe_resource *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; unsigned dirty; unsigned num_samplers; unsigned num_fragment_sampler_views; - unsigned num_vertex_buffers; struct i915_winsys_batchbuffer *batch; @@ -237,6 +245,35 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; + unsigned immediate_dirty : I915_MAX_IMMEDIATE; + unsigned dynamic_dirty : I915_MAX_DYNAMIC; + unsigned static_dirty : 4; + unsigned flush_dirty : 2; + + struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS]; + int num_validation_buffers; + + struct util_slab_mempool transfer_pool; + + /** blitter/hw-clear */ + struct blitter_context* blitter; + + /** State tracking needed by u_blitter for save/restore. */ + void *saved_fs; + void (*saved_bind_fs_state)(struct pipe_context *pipe, void *shader); + void *saved_vs; + struct pipe_clip_state saved_clip; + struct i915_velems_state *saved_velems; + unsigned saved_nr_vertex_buffers; + struct pipe_vertex_buffer saved_vertex_buffers[PIPE_MAX_ATTRIBS]; + unsigned saved_nr_samplers; + void *saved_samplers[PIPE_MAX_SAMPLERS]; + void (*saved_bind_sampler_states)(struct pipe_context *pipe, + unsigned num, void **sampler); + unsigned saved_nr_sampler_views; + struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS]; + void (*saved_set_sampler_views)(struct pipe_context *pipe, + unsigned num, struct pipe_sampler_view **views); }; /* A flag for each state_tracker state object: @@ -253,9 +290,11 @@ struct i915_context { #define I915_NEW_DEPTH_STENCIL 0x200 #define I915_NEW_SAMPLER 0x400 #define I915_NEW_SAMPLER_VIEW 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 +#define I915_NEW_VS_CONSTANTS 0x1000 +#define I915_NEW_FS_CONSTANTS 0x2000 +#define I915_NEW_GS_CONSTANTS 0x4000 +#define I915_NEW_VBO 0x8000 +#define I915_NEW_VS 0x10000 /* Driver's internally generated state flags: @@ -272,7 +311,25 @@ struct i915_context { #define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) #define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) #define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1)) + +/* hw flush handling */ +#define I915_FLUSH_CACHE 1 +#define I915_PIPELINE_FLUSH 2 + +/* split up static state */ +#define I915_DST_BUF_COLOR 1 +#define I915_DST_BUF_DEPTH 2 +#define I915_DST_VARS 4 +#define I915_DST_RECT 8 + +static INLINE +void i915_set_flush_dirty(struct i915_context *i915, unsigned flush) +{ + i915->hardware_dirty |= I915_HW_FLUSH; + i915->flush_dirty |= flush; +} /*********************************************************************** @@ -297,14 +354,20 @@ void i915_emit_hardware_state(struct i915_context *i915 ); /*********************************************************************** * i915_clear.c: */ -void i915_clear( struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil); +void i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil); +void i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil); +void i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil, + unsigned destx, unsigned desty, unsigned width, unsigned height); /*********************************************************************** * */ void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_fixup_state_functions( struct i915_context *i915 ); void i915_init_flush_functions( struct i915_context *i915 ); void i915_init_string_functions( struct i915_context *i915 ); diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index d7150c99c4..c4eed473e9 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -46,12 +46,18 @@ static const struct debug_named_value debug_options[] = { }; unsigned i915_debug = 0; -boolean i915_tiling = TRUE; -void i915_debug_init(struct i915_screen *screen) +DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0) +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_use_blitter, "I915_USE_BLITTER", FALSE) + +void i915_debug_init(struct i915_screen *is) { - i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); - i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE); + i915_debug = debug_get_option_i915_debug(); + is->debug.tiling = !debug_get_option_i915_no_tiling(); + is->debug.lie = debug_get_option_i915_lie(); + is->debug.use_blitter = debug_get_option_i915_use_blitter(); } @@ -948,7 +954,8 @@ i915_dump_dirty(struct i915_context *i915, const char *func) {I915_NEW_DEPTH_STENCIL, "depth_stencil"}, {I915_NEW_SAMPLER, "sampler"}, {I915_NEW_SAMPLER_VIEW, "sampler_view"}, - {I915_NEW_CONSTANTS, "constants"}, + {I915_NEW_VS_CONSTANTS, "vs_const"}, + {I915_NEW_FS_CONSTANTS, "fs_const"}, {I915_NEW_VBO, "vbo"}, {I915_NEW_VS, "vs"}, {0, NULL}, @@ -976,7 +983,7 @@ i915_dump_hardware_dirty(struct i915_context *i915, const char *func) {I915_HW_PROGRAM, "program"}, {I915_HW_CONSTANTS, "constants"}, {I915_HW_IMMEDIATE, "immediate"}, - {I915_HW_INVARIENT, "invarient"}, + {I915_HW_INVARIANT, "invariant"}, {0, NULL}, }; int i; diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 11af7662f0..fa60799d0c 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -46,7 +46,6 @@ struct i915_winsys_batchbuffer; #define DBG_CONSTANTS 0x20 extern unsigned i915_debug; -extern boolean i915_tiling; #ifdef DEBUG static INLINE boolean diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index a2c70b1199..b4e81147c4 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -39,34 +39,12 @@ static void i915_flush_pipe( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { struct i915_context *i915 = i915_context(pipe); draw_flush(i915->draw); -#if 0 - /* Do we need to emit an MI_FLUSH command to flush the hardware - * caches? - */ - if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { - unsigned flush = MI_FLUSH; - - if (!(flags & PIPE_FLUSH_RENDER_CACHE)) - flush |= INHIBIT_FLUSH_RENDER_CACHE; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) - flush |= FLUSH_MAP_CACHE; - - if (!BEGIN_BATCH(1, 0)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(1, 0)); - } - OUT_BATCH( flush ); - } -#endif - if (i915->batch->map == i915->batch->ptr) { return; } @@ -74,7 +52,6 @@ static void i915_flush_pipe( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); - i915->vbo_flushed = 1; I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); } @@ -93,5 +70,11 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) struct i915_winsys_batchbuffer *batch = i915->batch; batch->iws->batchbuffer_flush(batch, fence); + i915->vbo_flushed = 1; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; + i915->static_dirty = ~0; + /* kernel emits flushes in between batchbuffers */ + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 25c53210be..b145b58be3 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -924,6 +924,14 @@ i915_translate_instructions(struct i915_fp_compile *p, tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(parse.FullToken.FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { @@ -1166,15 +1174,24 @@ void i915_translate_fragment_program( struct i915_context *i915, struct i915_fragment_shader *fs) { - struct i915_fp_compile *p = i915_init_compile(i915, fs); + struct i915_fp_compile *p; const struct tgsi_token *tokens = fs->state.tokens; - i915_find_wpos_space(p); - #if 0 tgsi_dump(tokens, 0); #endif + /* hw doesn't seem to like empty frag programs, even when the depth write + * fixup gets emitted below - may that one is fishy, too? */ + if (fs->info.num_instructions == 1) { + i915_use_passthrough_shader(fs); + + return; + } + + p = i915_init_compile(i915, fs); + i915_find_wpos_space(p); + i915_translate_instructions(p, tokens); i915_fixup_depth_write(p); diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index dd997e2cf4..85656cd784 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -144,15 +144,14 @@ emit_prim( struct draw_stage *stage, vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { assert(0); return; } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index baebbc7bae..79db3b650e 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -181,6 +181,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) struct i915_winsys *iws = i915->iws; if (i915_render->vbo) { + iws->buffer_unmap(iws, i915_render->vbo); iws->buffer_destroy(iws, i915_render->vbo); /* * XXX If buffers where referenced then this should be done in @@ -208,6 +209,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, I915_NEW_VERTEX); + i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); } /** @@ -262,16 +264,13 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct i915_winsys *iws = i915->iws; if (i915->vbo_flushed) debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); #ifdef VBUF_MAP_BUFFER - i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset; #else - (void)iws; return (unsigned char *)i915_render->vbo_ptr; #endif } @@ -288,7 +287,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, i915_render->vbo_max_index = max_index; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); #ifdef VBUF_MAP_BUFFER - iws->buffer_unmap(iws, i915_render->vbo); + (void)iws; #else i915_render->map_used_start = i915_render->vertex_size * min_index; i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); @@ -466,16 +465,15 @@ draw_arrays_fallback(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } @@ -515,16 +513,15 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { assert(0); goto out; } @@ -636,16 +633,15 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } @@ -684,6 +680,15 @@ static void i915_vbuf_render_destroy(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct i915_winsys *iws = i915->iws; + + if (i915_render->vbo) { + i915->vbo = NULL; + iws->buffer_unmap(iws, i915_render->vbo); + iws->buffer_destroy(iws, i915_render->vbo); + } + FREE(i915_render); } diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index 5e4e80ddf6..6fe032cdb6 100644 --- a/src/gallium/drivers/i915/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h @@ -148,6 +148,7 @@ /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) /* Dword 1 */ +#define CLASSIC_EARLY_DEPTH (1<<31) #define TEX_DEFAULT_COLOR_OGL (0<<30) #define TEX_DEFAULT_COLOR_D3D (1<<30) #define ZR_EARLY_DEPTH (1<<29) diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c index 499233ceb9..7f52ba11d6 100644 --- a/src/gallium/drivers/i915/i915_resource.c +++ b/src/gallium/drivers/i915/i915_resource.c @@ -31,7 +31,6 @@ i915_resource_from_handle(struct pipe_screen * screen, void i915_init_resource_functions(struct i915_context *i915 ) { - i915->base.is_resource_referenced = u_default_is_resource_referenced; i915->base.get_transfer = u_get_transfer_vtbl; i915->base.transfer_map = u_transfer_map_vtbl; i915->base.transfer_flush_region = u_transfer_flush_region_vtbl; diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c index d3d6a6752a..d02c768703 100644 --- a/src/gallium/drivers/i915/i915_resource_buffer.c +++ b/src/gallium/drivers/i915/i915_resource_buffer.c @@ -60,6 +60,38 @@ i915_buffer_destroy(struct pipe_screen *screen, } +static struct pipe_transfer * +i915_get_transfer(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + + if (transfer == NULL) + return NULL; + + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; +} + +static void +i915_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct i915_context *i915 = i915_context(pipe); + util_slab_free(&i915->transfer_pool, transfer); +} + static void * i915_buffer_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) @@ -91,9 +123,8 @@ struct u_resource_vtbl i915_buffer_vtbl = { i915_buffer_get_handle, /* get_handle */ i915_buffer_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + i915_get_transfer, /* get_transfer */ + i915_transfer_destroy, /* transfer_destroy */ i915_buffer_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ u_default_transfer_unmap, /* transfer_unmap */ @@ -115,8 +146,7 @@ i915_buffer_create(struct pipe_screen *screen, buf->b.vtbl = &i915_buffer_vtbl; pipe_reference_init(&buf->b.b.reference, 1); buf->b.b.screen = screen; - - buf->data = MALLOC(template->width0); + buf->data = align_malloc(template->width0, 16); buf->free_on_destroy = TRUE; if (!buf->data) diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index f19106f341..7816925d23 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -172,19 +172,22 @@ i915_texture_set_image_offset(struct i915_texture *tex, } static enum i915_winsys_buffer_tile -i915_texture_tiling(struct pipe_resource *pt) +i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex) { - if (!i915_tiling) + if (!is->debug.tiling) return I915_TILE_NONE; - if (pt->target == PIPE_TEXTURE_1D) + if (tex->b.b.target == PIPE_TEXTURE_1D) return I915_TILE_NONE; - if (util_format_is_s3tc(pt->format)) + if (util_format_is_s3tc(tex->b.b.format)) /* XXX X-tiling might make sense */ return I915_TILE_NONE; - return I915_TILE_X; + if (is->debug.use_blitter) + return I915_TILE_X; + else + return I915_TILE_Y; } @@ -401,11 +404,7 @@ i915_texture_layout_3d(struct i915_texture *tex) static boolean i915_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -649,11 +648,7 @@ i945_texture_layout_cube(struct i915_texture *tex) static boolean i945_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -664,7 +659,7 @@ i945_texture_layout(struct i915_texture * tex) i945_texture_layout_3d(tex); break; case PIPE_TEXTURE_CUBE: - if (!util_format_is_s3tc(pt->format)) + if (!util_format_is_s3tc(tex->b.b.format)) i9x5_texture_layout_cube(tex); else i945_texture_layout_cube(tex); @@ -716,14 +711,16 @@ i915_texture_destroy(struct pipe_screen *screen, } static struct pipe_transfer * -i915_texture_get_transfer(struct pipe_context *context, +i915_texture_get_transfer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box) { + struct i915_context *i915 = i915_context(pipe); struct i915_texture *tex = i915_texture(resource); - struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); + struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + if (transfer == NULL) return NULL; @@ -737,6 +734,14 @@ i915_texture_get_transfer(struct pipe_context *context, return transfer; } +static void +i915_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct i915_context *i915 = i915_context(pipe); + util_slab_free(&i915->transfer_pool, transfer); +} + static void * i915_texture_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) @@ -754,6 +759,9 @@ i915_texture_transfer_map(struct pipe_context *pipe, assert(box->z == 0); offset = i915_texture_offset(tex, transfer->level, box->z); + /* TODO this is a sledgehammer */ + pipe->flush(pipe, NULL); + map = iws->buffer_map(iws, tex->buffer, (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); if (map == NULL) @@ -779,9 +787,8 @@ struct u_resource_vtbl i915_texture_vtbl = { i915_texture_get_handle, /* get_handle */ i915_texture_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ i915_texture_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + i915_transfer_destroy, /* transfer_destroy */ i915_texture_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ i915_texture_transfer_unmap, /* transfer_unmap */ @@ -808,6 +815,8 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; + tex->tiling = i915_texture_tiling(is, tex); + if (is->is_i945) { if (!i945_texture_layout(tex)) goto fail; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index f66478e729..e62b609eb5 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -35,7 +35,6 @@ #include "i915_debug.h" #include "i915_context.h" #include "i915_screen.h" -#include "i915_surface.h" #include "i915_resource.h" #include "i915_winsys.h" #include "i915_public.h" @@ -99,59 +98,84 @@ i915_get_name(struct pipe_screen *screen) } static int -i915_get_param(struct pipe_screen *screen, enum pipe_cap param) +i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + struct i915_screen *is = i915_screen(screen); + + switch (cap) { + /* Supported features (boolean caps). */ + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_NPOT_TEXTURES: - return 1; + case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; + + /* Features that should be supported (boolean caps). */ + /* XXX: Just test the code */ + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + /* XXX: No code but hw supports it */ case PIPE_CAP_POINT_SPRITE: + /* Also lie about these when asked to (needed for GLSL / GL 2.0) */ + return is->debug.lie ? 1 : 0; + + /* Unsupported features (boolean caps). */ + case PIPE_CAP_ARRAY_TEXTURES: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TIMER_QUERY: return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + + /* Features we can lie about (boolean caps). */ + case PIPE_CAP_GLSL: case PIPE_CAP_OCCLUSION_QUERY: + return is->debug.lie ? 1 : 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 8; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: return 0; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return I915_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - /* disable for now */ - return 0; + default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static int -i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap cap) { switch(shader) { case PIPE_SHADER_VERTEX: - return draw_get_shader_param(shader, param); + return draw_get_shader_param(shader, cap); case PIPE_SHADER_FRAGMENT: break; default: @@ -159,7 +183,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha } /* XXX: these are just shader model 2.0 values, fix this! */ - switch(param) { + switch(cap) { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: return 96; case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: @@ -192,15 +216,15 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_SUBROUTINES: return 0; default: - assert(0); + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static float -i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) +i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { + switch(cap) { case PIPE_CAP_MAX_LINE_WIDTH: /* fall-through */ case PIPE_CAP_MAX_LINE_WIDTH_AA: @@ -218,6 +242,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) return 16.0; default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } @@ -227,8 +252,7 @@ i915_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, @@ -295,24 +319,23 @@ i915_fence_reference(struct pipe_screen *screen, is->iws->fence_reference(is->iws, ptr, fence); } -static int +static boolean i915_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct i915_screen *is = i915_screen(screen); - return is->iws->fence_signalled(is->iws, fence); + return is->iws->fence_signalled(is->iws, fence) == 0; } -static int +static boolean i915_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct i915_screen *is = i915_screen(screen); - return is->iws->fence_finish(is->iws, fence); + return is->iws->fence_finish(is->iws, fence) == 0; } @@ -322,6 +345,20 @@ i915_fence_finish(struct pipe_screen *screen, static void +i915_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *winsys_drawable_handle) +{ + /* XXX: Dummy right now. */ + (void)screen; + (void)resource; + (void)level; + (void)layer; + (void)winsys_drawable_handle; +} + +static void i915_destroy_screen(struct pipe_screen *screen) { struct i915_screen *is = i915_screen(screen); @@ -372,6 +409,7 @@ i915_screen_create(struct i915_winsys *iws) is->base.winsys = NULL; is->base.destroy = i915_destroy_screen; + is->base.flush_frontbuffer = i915_flush_frontbuffer; is->base.get_name = i915_get_name; is->base.get_vendor = i915_get_vendor; diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index 0c4186c68e..cfc585b535 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -45,16 +45,12 @@ struct i915_screen struct i915_winsys *iws; boolean is_i945; -}; - -/** - * Subclass of pipe_transfer - */ -struct i915_transfer -{ - struct pipe_transfer base; - unsigned offset; + struct { + boolean tiling; + boolean lie; + boolean use_blitter; + } debug; }; @@ -69,11 +65,5 @@ i915_screen(struct pipe_screen *pscreen) return (struct i915_screen *) pscreen; } -static INLINE struct i915_transfer * -i915_transfer(struct pipe_transfer *transfer) -{ - return (struct i915_transfer *)transfer; -} - #endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index bbfcff6bc4..1b57c5776f 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -33,6 +33,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" @@ -57,10 +58,8 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; - /* - case PIPE_TEX_WRAP_MIRRORED_REPEAT: + case PIPE_TEX_WRAP_MIRROR_REPEAT: return TEXCOORDMODE_MIRROR; - */ default: return TEXCOORDMODE_WRAP; } @@ -288,6 +287,17 @@ i915_create_sampler_state(struct pipe_context *pipe, return cso; } +static void i915_fixup_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->saved_nr_samplers = num; + memcpy(&i915->saved_samplers, sampler, sizeof(void *) * num); + + i915->saved_bind_sampler_states(pipe, num, sampler); +} + static void i915_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -467,6 +477,17 @@ i915_create_fs_state(struct pipe_context *pipe, } static void +i915_fixup_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->saved_fs = shader; + + i915->saved_bind_fs_state(pipe, shader); +} + +static void i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); @@ -506,6 +527,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); + i915->saved_vs = shader; + /* just pass-through to draw module */ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); @@ -525,32 +548,74 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, struct pipe_resource *buf) { struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); + unsigned new_num = 0; + boolean diff = TRUE; - /* Make a copy of shader constants. - * During fragment program translation we may add additional - * constants to the array. - * - * We want to consider the situation where some user constants - * (ex: a material color) may change frequently but the shader program - * stays the same. In that case we should only be updating the first - * N constants, leaving any extras from shader translation alone. - */ + + /* XXX don't support geom shaders now */ + if (shader == PIPE_SHADER_GEOMETRY) + return; + + /* if we have a new buffer compare it with the old one */ if (buf) { - struct i915_buffer *ir = i915_buffer(buf); - memcpy(i915->current.constants[shader], ir->data, ir->b.b.width0); - i915->current.num_user_constants[shader] = (ir->b.b.width0 / - 4 * sizeof(float)); - } - else { - i915->current.num_user_constants[shader] = 0; + struct i915_buffer *ibuf = i915_buffer(buf); + struct pipe_resource *old_buf = i915->constants[shader]; + struct i915_buffer *old = old_buf ? i915_buffer(old_buf) : NULL; + unsigned old_num = i915->current.num_user_constants[shader]; + + new_num = ibuf->b.b.width0 / 4 * sizeof(float); + + if (old_num == new_num) { + if (old_num == 0) + diff = FALSE; +#if 0 + /* XXX no point in running this code since st/mesa only uses user buffers */ + /* Can't compare the buffer data since they are userbuffers */ + else if (old && old->free_on_destroy) + diff = memcmp(old->data, ibuf->data, ibuf->b.b.width0); +#else + (void)old; +#endif + } + } else { + diff = i915->current.num_user_constants[shader] != 0; } + /* + * flush before updateing the state. + */ + if (diff && shader == PIPE_SHADER_FRAGMENT) + draw_flush(i915->draw); + + pipe_resource_reference(&i915->constants[shader], buf); + i915->current.num_user_constants[shader] = new_num; - i915->dirty |= I915_NEW_CONSTANTS; + if (diff) + i915->dirty |= shader == PIPE_SHADER_VERTEX ? I915_NEW_VS_CONSTANTS : I915_NEW_FS_CONSTANTS; } +static void +i915_fixup_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + struct i915_context *i915 = i915_context(pipe); + int i; + + for (i = 0; i < num; i++) + pipe_sampler_view_reference(&i915->saved_sampler_views[i], + views[i]); + + for (i = num; i < i915->saved_nr_sampler_views; i++) + pipe_sampler_view_reference(&i915->saved_sampler_views[i], + NULL); + + i915->saved_nr_sampler_views = num; + + i915->saved_set_sampler_views(pipe, num, views); +} + static void i915_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -622,7 +687,8 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, i915->framebuffer.height = fb->height; i915->framebuffer.nr_cbufs = fb->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); + pipe_surface_reference(&i915->framebuffer.cbufs[i], + i < fb->nr_cbufs ? fb->cbufs[i] : NULL); } pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); @@ -637,6 +703,8 @@ static void i915_set_clip_state( struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); draw_flush(i915->draw); + i915->saved_clip = *clip; + draw_set_clip_state(i915->draw, clip); i915->dirty |= I915_NEW_CLIP; @@ -667,7 +735,7 @@ i915_create_rasterizer_state(struct pipe_context *pipe, { struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); - cso->templ = rasterizer; + cso->templ = *rasterizer; cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; cso->light_twoside = rasterizer->light_twoside; cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; @@ -738,7 +806,7 @@ static void i915_bind_rasterizer_state( struct pipe_context *pipe, /* pass-through to draw module */ draw_set_rasterizer_state(i915->draw, - (i915->rasterizer ? i915->rasterizer->templ : NULL), + (i915->rasterizer ? &(i915->rasterizer->templ) : NULL), raster); i915->dirty |= I915_NEW_RASTERIZER; @@ -755,16 +823,28 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); + struct draw_context *draw = i915->draw; + int i; - memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); - i915->num_vertex_buffers = count; + util_copy_vertex_buffers(i915->saved_vertex_buffers, + &i915->saved_nr_vertex_buffers, + buffers, count); +#if 0 + /* XXX doesn't look like this is needed */ + /* unmap old */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + } +#endif /* pass-through to draw module */ - draw_set_vertex_buffers(i915->draw, count, buffers); + draw_set_vertex_buffers(draw, count, buffers); + + /* map new */ + for (i = 0; i < count; i++) { + void *buf = i915_buffer(buffers[i].buffer)->data; + draw_set_mapped_vertex_buffer(draw, i, buf); + } } static void * @@ -789,10 +869,7 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems; - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); + i915->saved_velems = velems; /* pass-through to draw module */ if (i915_velems) { @@ -870,4 +947,16 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_index_buffer = i915_set_index_buffer; + i915->base.redefine_user_buffer = u_default_redefine_user_buffer; +} + +void +i915_init_fixup_state_functions( struct i915_context *i915 ) +{ + i915->saved_bind_fs_state = i915->base.bind_fs_state; + i915->base.bind_fs_state = i915_fixup_bind_fs_state; + i915->saved_bind_sampler_states = i915->base.bind_fragment_sampler_states; + i915->base.bind_fragment_sampler_states = i915_fixup_bind_sampler_states; + i915->saved_set_sampler_views = i915->base.set_fragment_sampler_views; + i915->base.set_fragment_sampler_views = i915_fixup_set_fragment_sampler_views; } diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h index b4074dc35b..3f4e40294e 100644 --- a/src/gallium/drivers/i915/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h @@ -48,6 +48,7 @@ extern struct i915_tracked_state i915_hw_immediate; extern struct i915_tracked_state i915_hw_dynamic; extern struct i915_tracked_state i915_hw_fs; extern struct i915_tracked_state i915_hw_framebuffer; +extern struct i915_tracked_state i915_hw_dst_buf_vars; extern struct i915_tracked_state i915_hw_constants; void i915_update_derived(struct i915_context *i915); diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 1d4026a214..59ac2f7292 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -165,6 +165,7 @@ static struct i915_tracked_state *atoms[] = { &i915_hw_dynamic, &i915_hw_fs, &i915_hw_framebuffer, + &i915_hw_dst_buf_vars, &i915_hw_constants, NULL, }; diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index d61a8c3407..204cee6fe9 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -46,18 +46,34 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect(struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords) +static INLINE void set_dynamic(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.dynamic[offset] == state) + return; + + i915->current.dynamic[offset] = state; + i915->dynamic_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + + +static INLINE void set_dynamic_array(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4)) return; - for (i = 0; i < dwords; i++) + for (i = 0; i < dwords; i++) { i915->current.dynamic[offset + i] = src[i]; + i915->dynamic_dirty |= 1 << (offset + i); + } i915->hardware_dirty |= I915_HW_DYNAMIC; } @@ -79,12 +95,7 @@ static void upload_MODES4(struct i915_context *i915) */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: - */ - set_dynamic_indirect(i915, - I915_DYNAMIC_MODES4, - &modes4, - 1); + set_dynamic(i915, I915_DYNAMIC_MODES4, modes4); } const struct i915_tracked_state i915_upload_MODES4 = { @@ -107,10 +118,7 @@ static void upload_BFO(struct i915_context *i915) bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect(i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2); + set_dynamic_array(i915, I915_DYNAMIC_BFO_0, bfo, 2); } const struct i915_tracked_state i915_upload_BFO = { @@ -141,10 +149,7 @@ static void upload_BLENDCOLOR(struct i915_context *i915) color[3]); } - set_dynamic_indirect(i915, - I915_DYNAMIC_BC_0, - bc, - 2); + set_dynamic_array(i915, I915_DYNAMIC_BC_0, bc, 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { @@ -161,10 +166,7 @@ static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - set_dynamic_indirect(i915, - I915_DYNAMIC_IAB, - &iab, - 1); + set_dynamic(i915, I915_DYNAMIC_IAB, iab); } const struct i915_tracked_state i915_upload_IAB = { @@ -179,10 +181,8 @@ const struct i915_tracked_state i915_upload_IAB = { */ static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect(i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2); + set_dynamic_array(i915, I915_DYNAMIC_DEPTHSCALE_0, + &i915->rasterizer->ds[0].u, 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { @@ -234,10 +234,7 @@ static void upload_STIPPLE(struct i915_context *i915) (p[3] << 12)); } - set_dynamic_indirect(i915, - I915_DYNAMIC_STP_0, - &st[0], - 2); + set_dynamic_array(i915, I915_DYNAMIC_STP_0, st, 2); } const struct i915_tracked_state i915_upload_STIPPLE = { @@ -253,10 +250,7 @@ const struct i915_tracked_state i915_upload_STIPPLE = { */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1); + set_dynamic(i915, I915_DYNAMIC_SC_ENA_0, i915->rasterizer->sc[0]); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { @@ -282,10 +276,7 @@ static void upload_SCISSOR_RECT(struct i915_context *i915) sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3); + set_dynamic_array(i915, I915_DYNAMIC_SC_RECT_0, sc, 3); } const struct i915_tracked_state i915_upload_SCISSOR_RECT = { diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index c48d53ffbb..0155cd8351 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -35,411 +35,425 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -static unsigned translate_format( enum pipe_format format ) +#include "util/u_math.h" +#include "util/u_memory.h" + +struct i915_tracked_hw_state { + const char *name; + void (*validate)(struct i915_context *, unsigned *batch_space); + void (*emit)(struct i915_context *); + unsigned dirty, batch_space; +}; + + +static void +validate_flush(struct i915_context *i915, unsigned *batch_space) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_B5G6R5_UNORM: - return COLOR_BUF_RGB565; - default: - assert(0); - return 0; - } + *batch_space = i915->flush_dirty ? 1 : 0; } -static unsigned translate_depth_format( enum pipe_format zformat ) +static void +emit_flush(struct i915_context *i915) { - switch (zformat) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return DEPTH_FRMT_24_FIXED_8_OTHER; - case PIPE_FORMAT_Z16_UNORM: - return DEPTH_FRMT_16_FIXED; - default: - assert(0); - return 0; - } + /* Cache handling is very cheap atm. State handling can request to flushes: + * - I915_FLUSH_CACHE which is a flush everything request and + * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. + * Because the cache handling is so dumb, no explicit "invalidate map cache". + * Also, the first is a strict superset of the latter, so the following logic + * works. */ + if (i915->flush_dirty & I915_FLUSH_CACHE) + OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); + else if (i915->flush_dirty & I915_PIPELINE_FLUSH) + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); } +uint32_t invariant_state[] = { + _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, -/** - * Examine framebuffer state to determine width, height. - */ -static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) + _3DSTATE_DFLT_DIFFUSE_CMD, 0, + + _3DSTATE_DFLT_SPEC_CMD, 0, + + _3DSTATE_DFLT_Z_CMD, 0, + + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D, + + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state for now + */ + _3DSTATE_LOAD_INDIRECT | 0, 0}; + +static void +emit_invariant(struct i915_context *i915) { - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; - return TRUE; - } - else { - *width = *height = 0; - return FALSE; - } + i915_winsys_batchbuffer_write(i915->batch, invariant_state, + Elements(invariant_state)*sizeof(uint32_t)); } -static inline uint32_t -buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +static void +validate_immediate(struct i915_context *i915, unsigned *batch_space) { - uint32_t tiling_bits = 0; - - switch (tiling) { - case I915_TILE_Y: - tiling_bits |= BUF_3D_TILE_WALK_Y; - case I915_TILE_X: - tiling_bits |= BUF_3D_TILED_SURFACE; - case I915_TILE_NONE: - break; - } + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; - return tiling_bits; + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo) + i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; + + *batch_space = 1 + util_bitcount(dirty); } -/* Push the state into the sarea and/or texture memory. - */ -void -i915_emit_hardware_state(struct i915_context *i915 ) +static void +emit_immediate(struct i915_context *i915) { - /* XXX: there must be an easier way */ - const unsigned dwords = ( 14 + - 7 + - I915_MAX_DYNAMIC + - 8 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_MAX_CONSTANT*4 + -#if 0 - i915->current.program_len + -#else - i915->fs->program_len + -#endif - 6 - ) * 3/2; /* plus 50% margin */ - const unsigned relocs = ( I915_TEX_UNITS + - 3 - ) * 3/2; /* plus 50% margin */ + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } - uintptr_t save_ptr; - size_t save_relocs; + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } +} - if (I915_DBG_ON(DBG_ATOMS)) - i915_dump_hardware_dirty(i915, __FUNCTION__); +static void +validate_dynamic(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); +} - if(!BEGIN_BATCH(dwords, relocs)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(dwords, relocs)); +static void +emit_dynamic(struct i915_context *i915) +{ + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (i915->dynamic_dirty & (1 << i)) + OUT_BATCH(i915->current.dynamic[i]); } +} - save_ptr = (uintptr_t)i915->batch->ptr; - save_relocs = i915->batch->relocs; - - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIENT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); +static void +validate_static(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = 0; + + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.cbuf_bo; + *batch_space += 3; } - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - /* FIXME: we should not do this */ - OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); - } - -#if 01 - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - OUT_BATCH(i915->current.dynamic[i]); - } + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.depth_bo; + *batch_space += 3; } -#endif -#if 01 - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + if (i915->static_dirty & I915_DST_VARS) + *batch_space += 2; + + if (i915->static_dirty & I915_DST_RECT) + *batch_space += 5; +} - if (cbuf_surface) { - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - assert(tex); +static void +emit_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, + I915_USAGE_RENDER, + 0); + } - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + /* What happens if no zbuf?? + */ + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, + I915_USAGE_RENDER, + 0); + } - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); + if (i915->static_dirty & I915_DST_VARS) { + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(i915->current.dst_buf_vars); + } +} - OUT_RELOC(tex->buffer, - I915_USAGE_RENDER, - 0); - } +static void +validate_map(struct i915_context *i915, unsigned *batch_space) +{ + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + struct i915_texture *tex; - /* What happens if no zbuf?? - */ - if (depth_surface) { - struct i915_texture *tex = i915_texture(depth_surface->texture); - unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, - depth_surface->u.tex.first_layer); - assert(tex); - assert(offset == 0); - - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - assert(tex); - OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); - - OUT_RELOC(tex->buffer, - I915_USAGE_RENDER, - 0); - } + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; - { - unsigned cformat, zformat = 0; - - if (cbuf_surface) - cformat = cbuf_surface->format; - else - cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ - cformat = translate_format(cformat); - - if (depth_surface) - zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); - - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat ); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; } } -#endif +} -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - struct i915_winsys_buffer *buf = texture->buffer; - assert(buf); - - count++; - - OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); +static void +emit_map(struct i915_context *i915) +{ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); + struct i915_winsys_buffer *buf = texture->buffer; + assert(buf); + + count++; + + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ } } -#endif + assert(count == nr); + } +} + +static void +validate_sampler(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; +} -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<<i)) { - OUT_BATCH( i915->current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } +static void +emit_sampler(struct i915_context *i915) +{ + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); } } } -#endif +} + +static void +validate_constants(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->num_constants ? + 2 + 4*i915->fs->num_constants : 0; +} + +static void +emit_constants(struct i915_context *i915) +{ + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH((1 << nr) - 1); -#if 01 - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_CONSTANTS) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; + c += 4 * i; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } #if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); } } -#endif +} -#if 01 - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { +static void +validate_program(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->program_len; +} + +static void +emit_program(struct i915_context *i915) +{ uint i; /* we should always have, at least, a pass-through program */ assert(i915->fs->program_len > 0); for (i = 0; i < i915->fs->program_len; i++) { OUT_BATCH(i915->fs->program[i]); } +} + +static void +emit_draw_rect(struct i915_context *i915) +{ + if (i915->static_dirty & I915_DST_RECT) { + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); } -#endif +} -#if 01 - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - uint w, h; - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - unsigned x, y; - int layer; - uint32_t draw_offset; - boolean ret; +static boolean +i915_validate_state(struct i915_context *i915, unsigned *batch_space) +{ + unsigned tmp; + + i915->num_validation_buffers = 0; + if (i915->hardware_dirty & I915_HW_INVARIANT) + *batch_space = Elements(invariant_state); + else + *batch_space = 0; + +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + validate_##atom(i915, &tmp); \ + *batch_space += tmp; } + VALIDATE_ATOM(flush, I915_HW_FLUSH); + VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); + VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); + VALIDATE_ATOM(static, I915_HW_STATIC); + VALIDATE_ATOM(map, I915_HW_MAP); + VALIDATE_ATOM(sampler, I915_HW_SAMPLER); + VALIDATE_ATOM(constants, I915_HW_CONSTANTS); + VALIDATE_ATOM(program, I915_HW_PROGRAM); +#undef VALIDATE_ATOM + + if (i915->num_validation_buffers == 0) + return TRUE; - ret = framebuffer_size(&i915->framebuffer, &w, &h); - assert(ret); + if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, + i915->num_validation_buffers)) + return FALSE; - layer = cbuf_surface->u.tex.first_layer; + return TRUE; +} - x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; - y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + unsigned batch_space; + uintptr_t save_ptr; - draw_offset = x | (y << 16); + assert(i915->dirty == 0); - /* XXX flush only required when the draw_offset changes! */ - OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(draw_offset); - OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16)); - OUT_BATCH(draw_offset); + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_hardware_dirty(i915, __FUNCTION__); + + if (!i915_validate_state(i915, &batch_space)) { + FLUSH_BATCH(NULL); + assert(i915_validate_state(i915, &batch_space)); } -#endif - I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, + if(!BEGIN_BATCH(batch_space)) { + FLUSH_BATCH(NULL); + assert(i915_validate_state(i915, &batch_space)); + assert(BEGIN_BATCH(batch_space)); + } + + save_ptr = (uintptr_t)i915->batch->ptr; + +#define EMIT_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) \ + emit_##atom(i915); + EMIT_ATOM(flush, I915_HW_FLUSH); + EMIT_ATOM(invariant, I915_HW_INVARIANT); + EMIT_ATOM(immediate, I915_HW_IMMEDIATE); + EMIT_ATOM(dynamic, I915_HW_DYNAMIC); + EMIT_ATOM(static, I915_HW_STATIC); + EMIT_ATOM(map, I915_HW_MAP); + EMIT_ATOM(sampler, I915_HW_SAMPLER); + EMIT_ATOM(constants, I915_HW_CONSTANTS); + EMIT_ATOM(program, I915_HW_PROGRAM); + EMIT_ATOM(draw_rect, I915_HW_STATIC); +#undef EMIT_ATOM + + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__, ((uintptr_t)i915->batch->ptr - save_ptr) / 4, - i915->batch->relocs - save_relocs); + batch_space); + assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space); i915->hardware_dirty = 0; + i915->immediate_dirty = 0; + i915->dynamic_dirty = 0; + i915->static_dirty = 0; + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_fpc.c b/src/gallium/drivers/i915/i915_state_fpc.c index ec7cec0e47..1959a24691 100644 --- a/src/gallium/drivers/i915/i915_state_fpc.c +++ b/src/gallium/drivers/i915/i915_state_fpc.c @@ -40,7 +40,7 @@ static void update_hw_constants(struct i915_context *i915) struct i915_tracked_state i915_hw_constants = { "hw_constants", update_hw_constants, - I915_NEW_CONSTANTS | I915_NEW_FS + I915_NEW_FS_CONSTANTS | I915_NEW_FS }; diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index f9ade7077f..8134864739 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -36,12 +36,20 @@ #include "util/u_memory.h" -/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. - * Would like to opportunistically recombine all these fragments into - * a single packet containing only what has changed, but for now emit - * as multiple packets. +/* Convinience function to check immediate state. */ +static INLINE void set_immediate(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.immediate[offset] == state) + return; + + i915->current.immediate[offset] = state; + i915->immediate_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_IMMEDIATE; +} @@ -56,9 +64,14 @@ static void upload_S0S1(struct i915_context *i915) */ LIS0 = i915->vbo_offset; + /* Need to force this */ + if (i915->dirty & I915_NEW_VBO) { + i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } + /* I915_NEW_VERTEX_SIZE */ - /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; @@ -66,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915) (vertex_size << 16)); } - /* I915_NEW_VBO - */ - if (1 || - i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) - { - i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; - i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S0, LIS0); + set_immediate(i915, I915_IMMEDIATE_S1, LIS1); } const struct i915_tracked_state i915_upload_S0S1 = { @@ -98,21 +103,13 @@ static void upload_S2S4(struct i915_context *i915) { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; - /* - debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); - */ assert(LIS4); /* should never be zero? */ } LIS4 |= i915->rasterizer->LIS4; - if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || - LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { - - i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; - i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S2, LIS2); + set_immediate(i915, I915_IMMEDIATE_S4, LIS4); } const struct i915_tracked_state i915_upload_S2S4 = { @@ -142,15 +139,12 @@ static void upload_S5(struct i915_context *i915) #if 0 /* I915_NEW_RASTERIZER */ - if (i915->state.Polygon->OffsetFill) { + if (i915->rasterizer->LIS7) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { - i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S5, LIS5); } const struct i915_tracked_state i915_upload_S5 = { @@ -180,14 +174,11 @@ static void upload_S6(struct i915_context *i915) */ LIS6 |= i915->depth_stencil->depth_LIS6; - if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { - i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S6, LIS6); } const struct i915_tracked_state i915_upload_S6 = { - "imm s6", + "imm S6", upload_S6, I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; @@ -204,10 +195,9 @@ static void upload_S7(struct i915_context *i915) */ LIS7 = i915->rasterizer->LIS7; - if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { - i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } +#if 0 + set_immediate(i915, I915_IMMEDIATE_S7, LIS7); +#endif } const struct i915_tracked_state i915_upload_S7 = { diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index dc9a4c1e2f..2865298318 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -27,17 +27,120 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_resource.h" +#include "i915_screen.h" /*********************************************************************** * Update framebuffer state */ +static unsigned translate_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_B5G6R5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format(enum pipe_format zformat) +{ + switch (zformat) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + +static inline uint32_t +buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= BUF_3D_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= BUF_3D_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} + static void update_framebuffer(struct i915_context *i915) { - /* HW emit currently references framebuffer state directly: + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + unsigned x, y; + int layer; + uint32_t draw_offset, draw_size; + + if (cbuf_surface) { + struct i915_texture *tex = i915_texture(cbuf_surface->texture); + assert(tex); + + i915->current.cbuf_bo = tex->buffer; + i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + + layer = cbuf_surface->u.tex.first_layer; + + x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; + y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; + } else { + i915->current.cbuf_bo = NULL; + x = y = 0; + } + i915->static_dirty |= I915_DST_BUF_COLOR; + + /* What happens if no zbuf?? */ + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, + depth_surface->u.tex.first_layer); + assert(tex); + assert(offset == 0); + + i915->current.depth_bo = tex->buffer; + i915->current.depth_flags = BUF_3D_ID_DEPTH | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + } else + i915->current.depth_bo = NULL; + i915->static_dirty |= I915_DST_BUF_DEPTH; + + /* drawing rect calculations */ + draw_offset = x | (y << 16); + draw_size = (i915->framebuffer.width - 1 + x) | + ((i915->framebuffer.height - 1 + y) << 16); + if (i915->current.draw_offset != draw_offset) { + i915->current.draw_offset = draw_offset; + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); + i915->static_dirty |= I915_DST_RECT; + } + if (i915->current.draw_size != draw_size) { + i915->current.draw_size = draw_size; + i915->static_dirty |= I915_DST_RECT; + } + i915->hardware_dirty |= I915_HW_STATIC; + + /* flush the cache in case we sample from the old renderbuffers */ + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } struct i915_tracked_state i915_hw_framebuffer = { @@ -45,3 +148,52 @@ struct i915_tracked_state i915_hw_framebuffer = { update_framebuffer, I915_NEW_FRAMEBUFFER }; + +static void update_dst_buf_vars(struct i915_context *i915) +{ + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + uint32_t dst_buf_vars, cformat, zformat; + uint32_t early_z = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + struct i915_screen *is = i915_screen(i915->base.screen); + + zformat = translate_depth_format(depth_surface->format); + + if (is->is_i945 && tex->tiling != I915_TILE_NONE + && !i915->fs->info.writes_z) + early_z = CLASSIC_EARLY_DEPTH; + } else + zformat = 0; + + dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat | + early_z; + + if (i915->current.dst_buf_vars != dst_buf_vars) { + if (early_z != (i915->current.dst_buf_vars & CLASSIC_EARLY_DEPTH)) + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); + + i915->current.dst_buf_vars = dst_buf_vars; + i915->static_dirty |= I915_DST_VARS; + i915->hardware_dirty |= I915_HW_STATIC; + } +} + +struct i915_tracked_state i915_hw_dst_buf_vars = { + "dst buf vars", + update_dst_buf_vars, + I915_NEW_FRAMEBUFFER | I915_NEW_FS +}; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index becc6e93c2..d02c420f6c 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -27,6 +27,7 @@ #include "i915_surface.h" #include "i915_resource.h" +#include "i915_state.h" #include "i915_blit.h" #include "i915_reg.h" #include "i915_screen.h" @@ -37,16 +38,119 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" +/* + * surface functions using the render engine + */ + +static void +i915_surface_copy_render(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct i915_context *i915 = i915_context(pipe); + + util_blitter_save_blend(i915->blitter, (void *)i915->blend); + util_blitter_save_depth_stencil_alpha(i915->blitter, (void *)i915->depth_stencil); + util_blitter_save_stencil_ref(i915->blitter, &i915->stencil_ref); + util_blitter_save_rasterizer(i915->blitter, (void *)i915->rasterizer); + util_blitter_save_fragment_shader(i915->blitter, i915->saved_fs); + util_blitter_save_vertex_shader(i915->blitter, i915->saved_vs); + util_blitter_save_viewport(i915->blitter, &i915->viewport); + util_blitter_save_clip(i915->blitter, &i915->saved_clip); + util_blitter_save_vertex_elements(i915->blitter, i915->saved_velems); + util_blitter_save_vertex_buffers(i915->blitter, i915->saved_nr_vertex_buffers, + i915->saved_vertex_buffers); + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + util_blitter_save_fragment_sampler_states(i915->blitter, + i915->saved_nr_samplers, + i915->saved_samplers); + util_blitter_save_fragment_sampler_views(i915->blitter, + i915->saved_nr_sampler_views, + i915->saved_sampler_views); + + util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); +} + +static void +i915_clear_render_target_render(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_framebuffer_state fb_state; + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 1; + fb_state.cbufs[0] = dst; + fb_state.zsbuf = NULL; + pipe->set_framebuffer_state(pipe, &fb_state); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, PIPE_CLEAR_COLOR, rgba, 0.0, 0x0, + dstx, dsty, width, height); + + pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state); + util_unreference_framebuffer_state(&i915->blitter->saved_fb_state); + i915->blitter->saved_fb_state.nr_cbufs = ~0; +} + +static void +i915_clear_depth_stencil_render(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_framebuffer_state fb_state; + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 0; + fb_state.zsbuf = dst; + pipe->set_framebuffer_state(pipe, &fb_state); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, clear_flags & PIPE_CLEAR_DEPTHSTENCIL, + NULL, depth, stencil, + dstx, dsty, width, height); + + pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state); + util_unreference_framebuffer_state(&i915->blitter->saved_fb_state); + i915->blitter->saved_fb_state.nr_cbufs = ~0; +} + +/* + * surface functions using the blitter + */ /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ static void -i915_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dst, unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, unsigned src_level, - const struct pipe_box *src_box) +i915_surface_copy_blitter(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { struct i915_texture *dst_tex = i915_texture(dst); struct i915_texture *src_tex = i915_texture(src); @@ -66,7 +170,6 @@ i915_surface_copy(struct pipe_context *pipe, assert(src_box->z == 0); src_offset = i915_texture_offset(src_tex, src_level, src_box->z); - assert( dst != src ); assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) ); assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) ); assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) ); @@ -81,13 +184,12 @@ i915_surface_copy(struct pipe_context *pipe, (short) src_box->width, (short) src_box->height ); } - static void -i915_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) +i915_clear_render_target_blitter(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; @@ -109,13 +211,13 @@ i915_clear_render_target(struct pipe_context *pipe, } static void -i915_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) +i915_clear_depth_stencil_blitter(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; @@ -193,9 +295,15 @@ i915_surface_destroy(struct pipe_context *ctx, void i915_init_surface_functions(struct i915_context *i915) { - i915->base.resource_copy_region = i915_surface_copy; - i915->base.clear_render_target = i915_clear_render_target; - i915->base.clear_depth_stencil = i915_clear_depth_stencil; + if (i915_screen(i915->base.screen)->debug.use_blitter) { + i915->base.resource_copy_region = i915_surface_copy_blitter; + i915->base.clear_render_target = i915_clear_render_target_blitter; + i915->base.clear_depth_stencil = i915_clear_depth_stencil_blitter; + } else { + i915->base.resource_copy_region = i915_surface_copy_render; + i915->base.clear_render_target = i915_clear_render_target_render; + i915->base.clear_depth_stencil = i915_clear_depth_stencil_render; + } i915->base.create_surface = i915_create_surface; i915->base.surface_destroy = i915_surface_destroy; } diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 24ea416f01..21cfdc9613 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -76,7 +76,6 @@ struct i915_winsys_batchbuffer { size_t size; size_t relocs; - size_t max_relocs; /*@}*/ }; @@ -95,6 +94,18 @@ struct i915_winsys { (*batchbuffer_create)(struct i915_winsys *iws); /** + * Validate buffers for usage in this batchbuffer. + * Does space-checking and asorted other book-keeping. + * + * @batch + * @buffers array to buffers to validate + * @num_of_buffers size of the passed array + */ + boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers); + + /** * Emit a relocation to a buffer. * Target position in batchbuffer is the same as ptr. * @@ -103,11 +114,12 @@ struct i915_winsys { * @usage how is the hardware going to use the buffer. * @offset add this to the reloc buffers address * @target buffer where to write the address, null for batchbuffer. + * @fenced relocation needs a fence. */ int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *reloc, enum i915_winsys_buffer_usage usage, - unsigned offset, bool fenced); + unsigned offset, boolean fenced); /** * Flush a bufferbatch. diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index e80067f3b1..3c93579246 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -64,13 +64,11 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) } struct brw_batchbuffer * -brw_batchbuffer_alloc(struct brw_winsys_screen *sws, - struct brw_chipset chipset) +brw_batchbuffer_alloc(struct brw_winsys_screen *sws) { struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); batch->sws = sws; - batch->chipset = chipset; brw_batchbuffer_reset(batch); return batch; diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 6ca9f617f5..6ecb91857d 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -26,7 +26,6 @@ struct brw_batchbuffer { struct brw_winsys_screen *sws; struct brw_winsys_buffer *buf; - struct brw_chipset chipset; /** * Values exported to speed up the writing the batchbuffer, @@ -47,8 +46,8 @@ struct brw_batchbuffer { /*@}*/ }; -struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws, - struct brw_chipset chipset ); +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws ); + void brw_batchbuffer_free(struct brw_batchbuffer *batch); diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index ccba205e8c..66b13ea58e 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -66,16 +66,14 @@ compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; - c.chipset = brw->chipset; c.key = *key; - c.need_ff_sync = c.chipset.is_igdng; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - if (c.chipset.is_igdng) + if (brw->gen == 5) delta = 3 * REG_SIZE; else delta = REG_SIZE; @@ -97,7 +95,7 @@ compile_clip_prog( struct brw_context *brw, if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 80e3a11a37..f123b73c06 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -125,12 +125,10 @@ struct brw_clip_compile { GLuint last_tmp; GLboolean need_direction; - struct brw_chipset chipset; GLuint last_mrf; GLuint header_position_offset; - GLboolean need_ff_sync; GLuint nr_color_attrs; GLuint offset_color0; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 66caadc4d5..4ed7362171 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -32,6 +32,7 @@ #include "util/u_debug.h" #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -41,7 +42,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { GLuint i = 0,j; - + struct brw_context *brw = c->func.brw; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; @@ -79,7 +80,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } - if (c->need_ff_sync) { + if (brw->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -120,6 +121,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); @@ -146,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -183,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -208,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -223,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_ENDIF(p, is_neg2); } } diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 5c3ccfd8d0..f56edf3177 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -109,7 +109,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Although up to 16 concurrent Clip threads are allowed on IGDNG, * only 2 threads can output VUEs at a time. */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) clip.thread4.max_threads = 16 - 1; else clip.thread4.max_threads = 2 - 1; @@ -134,7 +134,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_G4X(brw)) + if (brw->is_g4x) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 069524bc14..7d400e6028 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -30,6 +30,7 @@ */ #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -43,6 +44,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; + struct brw_context *brw = c->func.brw; /* Register usage is static, precompute here: */ @@ -69,7 +71,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, for (j = 0; j < 3; j++) { GLuint delta = c->key.nr_attrs*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); @@ -110,7 +112,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } - if (c->need_ff_sync) { + if (brw->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -563,7 +565,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (c->chipset.is_965) { + if (p->brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 23e51ee9bc..5713f25da7 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -31,6 +31,7 @@ #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -126,6 +127,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg tmp = get_tmp(c); GLuint i; @@ -142,7 +144,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->key.nr_attrs; i++) { GLuint delta = i*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = i * 16 + 32 * 3; if (delta == c->offset_edgeflag) { @@ -176,7 +178,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); @@ -350,7 +352,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct brw_context *brw = c->func.brw; + if (brw->needs_ff_sync) { struct brw_compile *p = &c->func; struct brw_instruction *need_ff_sync; @@ -379,7 +382,8 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) void brw_clip_init_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct brw_context *brw = c->func.brw; + if (brw->needs_ff_sync) { struct brw_compile *p = &c->func; brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index a2736f783d..41a468a32f 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -107,7 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, void *priv) { struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - + struct brw_screen *brs = brw_screen(screen); if (!brw) { debug_printf("%s: failed to alloc context\n", __FUNCTION__); return NULL; @@ -117,7 +117,10 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, brw->base.priv = priv; brw->base.destroy = brw_destroy_context; brw->sws = brw_screen(screen)->sws; - brw->chipset = brw_screen(screen)->chipset; + brw->is_g4x = brs->is_g4x; + brw->needs_ff_sync = brs->needs_ff_sync; + brw->has_negative_rhw_bug = brs->has_negative_rhw_bug; + brw->gen = brs->gen; brw_init_resource_functions( brw ); brw_pipe_blend_init( brw ); @@ -145,7 +148,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, make_empty_list(&brw->query.active_head); - brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset ); + brw->batch = brw_batchbuffer_alloc( brw->sws ); if (brw->batch == NULL) goto fail; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index d927f382d5..45fc26dd7d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -529,7 +529,14 @@ struct brw_query_object { struct brw_context { struct pipe_context base; - struct brw_chipset chipset; + int gen; + boolean has_negative_rhw_bug; + boolean needs_ff_sync; + boolean is_g4x; + + int urb_size; + int vs_max_threads; + int wm_max_threads; struct brw_winsys_screen *sws; @@ -854,11 +861,5 @@ brw_context( struct pipe_context *ctx ) return (struct brw_context *)ctx; } - -#define BRW_IS_965(brw) ((brw)->chipset.is_965) -#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng) -#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x) - - #endif diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index e201ce4d7c..7547eae97c 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -463,6 +463,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -502,6 +509,27 @@ #define BRW_MASK_ENABLE 0 #define BRW_MASK_DISABLE 1 +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + #define BRW_OPCODE_MOV 1 #define BRW_OPCODE_SEL 2 #define BRW_OPCODE_NOT 4 @@ -531,6 +559,8 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_SENDC 50 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -550,6 +580,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 @@ -599,6 +630,8 @@ #define BRW_ARF_NOTIFICATION_COUNT 0x90 #define BRW_ARF_IP 0xA0 +#define BRW_MRF_COMPR4 (1 << 7) + #define BRW_AMASK 0 #define BRW_IMASK 1 #define BRW_LMASK 2 @@ -645,13 +678,14 @@ #define BRW_POLYGON_FACING_BACK 1 #define BRW_MESSAGE_TARGET_NULL 0 -#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ #define BRW_MESSAGE_TARGET_SAMPLER 2 #define BRW_MESSAGE_TARGET_GATEWAY 3 -#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 -#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */ +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */ #define BRW_MESSAGE_TARGET_URB 6 #define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 +#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */ #define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 #define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 @@ -674,20 +708,15 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 - -/* for IGDNG only */ +#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5 0 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2 +#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6 + +/* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 #define BRW_SAMPLER_SIMD_MODE_SIMD8 1 #define BRW_SAMPLER_SIMD_MODE_SIMD16 2 @@ -705,10 +734,24 @@ #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 +/* This one stays the same across generations. */ #define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ #define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 #define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 #define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 #define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 @@ -728,6 +771,16 @@ #define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 #define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 +/* GEN6 */ +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14 + #define BRW_MATH_FUNCTION_INV 1 #define BRW_MATH_FUNCTION_LOG 2 #define BRW_MATH_FUNCTION_EXP 3 @@ -736,7 +789,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -787,17 +841,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -816,6 +886,236 @@ #define CMD_INDEX_BUFFER 0x780a #define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_URB_GS_SIZE_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 +# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -827,6 +1127,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 @@ -839,8 +1158,8 @@ #define R02_PRIM_END 0x1 #define R02_PRIM_START 0x2 -#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ - (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ +#define URB_SIZES(brw) (brw->gen == 5 ? 1024 : \ + (brw->is_g4x ? 384 : 256)) /* 512 bit units */ diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 28c83515ba..b093569f0c 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -49,12 +49,14 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, @@ -69,13 +71,14 @@ struct { [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, @@ -144,7 +147,6 @@ char *chan_sel[4] = { }; char *dest_condmod[16] = { - [0] = NULL }; char *debug_ctrl[2] = { @@ -157,6 +159,16 @@ char *saturate[2] = { [1] = ".sat" }; +char *accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +char *wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + char *exec_size[8] = { [0] = "1", [1] = "2", @@ -204,6 +216,7 @@ char *compr_ctrl[4] = { [0] = "", [1] = "sechalf", [2] = "compr", + [3] = "compr4", }; char *dep_ctrl[4] = { @@ -233,6 +246,16 @@ char *reg_encoding[8] = { [7] = "F" }; +int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + char *imm_encoding[8] = { [0] = "UD", [1] = "D", @@ -321,6 +344,11 @@ char *math_precision[2] = { [1] = "partial_precision" }; +char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + char *urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", @@ -416,6 +444,11 @@ static int print_opcode (FILE *file, int id) static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) { int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: @@ -427,6 +460,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) case BRW_ARF_ACCUMULATOR: format (file, "acc%d", _reg_nr & 0x0f); break; + case BRW_ARF_FLAG: + format (file, "f%d", _reg_nr & 0x0f); + break; case BRW_ARF_MASK: format (file, "mask%d", _reg_nr & 0x0f); break; @@ -457,7 +493,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) return err; } -static int dest (FILE *file, const struct brw_instruction *inst) +static int dest (FILE *file, struct brw_instruction *inst) { int err = 0; @@ -469,7 +505,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da1.dest_subreg_nr) - format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); } @@ -477,7 +514,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) { string (file, "g[a0"); if (inst->bits1.ia1.dest_subreg_nr) - format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); if (inst->bits1.ia1.dest_indirect_offset) format (file, " %d", inst->bits1.ia1.dest_indirect_offset); string (file, "]"); @@ -493,7 +531,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da16.dest_subreg_nr) - format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); @@ -534,7 +573,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, if (err == -1) return 0; if (sub_reg_num) - format (file, ".%d", sub_reg_num); + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; @@ -588,11 +627,12 @@ static int src_da16 (FILE *file, if (err == -1) return 0; if (_subreg_nr) - format (file, ".%d", _subreg_nr); + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); - string (file, ",1,1>"); - err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + string (file, ",4,1>"); /* * Three kinds of swizzle display: * identity - nothing printed @@ -619,11 +659,12 @@ static int src_da16 (FILE *file, err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); return err; } -static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { switch (type) { case BRW_REGISTER_TYPE_UD: format (file, "0x%08xUD", inst->bits3.ud); @@ -652,7 +693,7 @@ static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { return 0; } -static int src0 (FILE *file, const struct brw_instruction *inst) +static int src0 (FILE *file, struct brw_instruction *inst) { if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src0_reg_type, @@ -712,7 +753,7 @@ static int src0 (FILE *file, const struct brw_instruction *inst) } } -static int src1 (FILE *file, const struct brw_instruction *inst) +static int src1 (FILE *file, struct brw_instruction *inst) { if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src1_reg_type, @@ -772,7 +813,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst) } } -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) +int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -822,7 +863,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) err |= src1 (file, inst); } - if (inst->header.opcode == BRW_OPCODE_SEND) { + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { newline (file); pad (file, 16); space = 0; @@ -842,24 +884,70 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) inst->bits3.math.precision, &space); break; case BRW_MESSAGE_TARGET_SAMPLER: - format (file, " (%d, %d, ", - inst->bits3.sampler.binding_table_index, - inst->bits3.sampler.sampler); - err |= control (file, "sampler target format", sampler_target_format, - inst->bits3.sampler.return_format, NULL); - string (file, ")"); + if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_MESSAGE_TARGET_DATAPORT_READ: + if (gen >= 6) { + format (file, " (%d, %d, %d, %d, %d, %d)", + inst->bits3.dp_render_cache.binding_table_index, + inst->bits3.dp_render_cache.msg_control, + inst->bits3.dp_render_cache.msg_type, + inst->bits3.dp_render_cache.send_commit_msg, + inst->bits3.dp_render_cache.msg_length, + inst->bits3.dp_render_cache.response_length); + } else if (gen >= 5 /* FINISHME: || is_g4x */) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } break; case BRW_MESSAGE_TARGET_DATAPORT_WRITE: - format (file, " (%d, %d, %d, %d)", - inst->bits3.dp_write.binding_table_index, - (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | - inst->bits3.dp_write.msg_control, - inst->bits3.dp_write.msg_type, - inst->bits3.dp_write.send_commit_msg); + if (gen >= 6) { + format (file, " (%d, %d, %d, %d, %d, %d)", + inst->bits3.dp_render_cache.binding_table_index, + inst->bits3.dp_render_cache.msg_control, + inst->bits3.dp_render_cache.msg_type, + inst->bits3.dp_render_cache.send_commit_msg, + inst->bits3.dp_render_cache.msg_length, + inst->bits3.dp_render_cache.response_length); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } break; case BRW_MESSAGE_TARGET_URB: - format (file, " %d", inst->bits3.urb.offset); - space = 1; + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, @@ -868,6 +956,11 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) inst->bits3.urb.used, &space); err |= control (file, "urb complete", urb_complete, inst->bits3.urb.complete, &space); + if (gen >= 5) { + format (file, " mlen %d, rlen %d\n", + inst->bits3.urb_gen5.msg_length, + inst->bits3.urb_gen5.response_length); + } break; case BRW_MESSAGE_TARGET_THREAD_SPAWNER: break; @@ -877,10 +970,17 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) } if (space) string (file, " "); - format (file, "mlen %d", - inst->bits3.generic.msg_length); - format (file, " rlen %d", - inst->bits3.generic.response_length); + if (gen >= 5) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } } pad (file, 64); if (inst->header.opcode != BRW_OPCODE_NOP) { @@ -891,7 +991,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); - if (inst->header.opcode == BRW_OPCODE_SEND) + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) err |= control (file, "end of thread", end_of_thread, inst->bits3.generic.end_of_thread, &space); if (space) @@ -905,13 +1006,13 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count) + struct brw_instruction *inst, + unsigned count, int gen) { int i, err; for (i = 0; i < count; i++) { - err = brw_disasm_insn(stderr, &inst[i]); + err = brw_disasm_insn(stderr, &inst[i], gen); if (err) return err; } diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index ba5b109c48..ce451ed5a0 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -27,10 +27,10 @@ struct brw_instruction; -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); +int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen); int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count); + struct brw_instruction *inst, + unsigned count, int gen); #endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index ebeb1e146a..04ec5c81a6 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -89,13 +89,16 @@ static int brw_prepare_vertices(struct brw_context *brw) vb->buffer->width0 - vb->buffer_offset : MAX2(vb->buffer->width0 - vb->buffer_offset, vb->stride * (max_index + 1 - min_index))); + boolean flushed; - ret = u_upload_buffer( brw->vb.upload_vertex, + ret = u_upload_buffer( brw->vb.upload_vertex, + 0, vb->buffer_offset + min_index * vb->stride, size, vb->buffer, &offset, - &upload_buf ); + &upload_buf, + &flushed ); if (ret) return ret; @@ -167,7 +170,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw ) OUT_RELOC(brw->vb.vb[i].bo, BRW_USAGE_VERTEX, brw->vb.vb[i].offset); - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { OUT_RELOC(brw->vb.vb[i].bo, BRW_USAGE_VERTEX, brw->vb.vb[i].bo->size - 1); @@ -251,13 +254,16 @@ static int brw_prepare_indices(struct brw_context *brw) /* Turn userbuffer into a proper hardware buffer? */ if (brw_buffer_is_user_buffer(index_buffer)) { + boolean flushed; ret = u_upload_buffer( brw->vb.upload_index, + 0, index_offset, ib_size, index_buffer, &offset, - &upload_buf ); + &upload_buf, + &flushed ); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 00d8eaccbc..ba1159e4c3 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -255,19 +255,19 @@ static void brw_set_math_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.math_igdng.function = function; - insn->bits3.math_igdng.int_type = integer_type; - insn->bits3.math_igdng.precision = low_precision; - insn->bits3.math_igdng.saturate = saturate; - insn->bits3.math_igdng.data_type = dataType; - insn->bits3.math_igdng.snapshot = 0; - insn->bits3.math_igdng.header_present = 0; - insn->bits3.math_igdng.response_length = response_length; - insn->bits3.math_igdng.msg_length = msg_length; - insn->bits3.math_igdng.end_of_thread = 0; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; - insn->bits2.send_igdng.end_of_thread = 0; + if (brw->gen == 5) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + insn->bits3.math_gen5.header_present = 0; + insn->bits3.math_gen5.response_length = response_length; + insn->bits3.math_gen5.msg_length = msg_length; + insn->bits3.math_gen5.end_of_thread = 0; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_gen5.end_of_thread = 0; } else { insn->bits3.math.function = function; insn->bits3.math.int_type = integer_type; @@ -295,18 +295,18 @@ static void brw_set_ff_sync_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_igdng.opcode = 1; - insn->bits3.urb_igdng.offset = offset; - insn->bits3.urb_igdng.swizzle_control = swizzle_control; - insn->bits3.urb_igdng.allocate = allocate; - insn->bits3.urb_igdng.used = used; - insn->bits3.urb_igdng.complete = complete; - insn->bits3.urb_igdng.header_present = 1; - insn->bits3.urb_igdng.response_length = response_length; - insn->bits3.urb_igdng.msg_length = msg_length; - insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + insn->bits3.urb_gen5.opcode = 1; + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; + insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.header_present = 1; + insn->bits3.urb_gen5.response_length = response_length; + insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } static void brw_set_urb_message( struct brw_context *brw, @@ -322,19 +322,19 @@ static void brw_set_urb_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.urb_igdng.opcode = 0; /* ? */ - insn->bits3.urb_igdng.offset = offset; - insn->bits3.urb_igdng.swizzle_control = swizzle_control; - insn->bits3.urb_igdng.allocate = allocate; - insn->bits3.urb_igdng.used = used; /* ? */ - insn->bits3.urb_igdng.complete = complete; - insn->bits3.urb_igdng.header_present = 1; - insn->bits3.urb_igdng.response_length = response_length; - insn->bits3.urb_igdng.msg_length = msg_length; - insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.urb_gen5.opcode = 0; /* ? */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.header_present = 1; + insn->bits3.urb_gen5.response_length = response_length; + insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -361,18 +361,18 @@ static void brw_set_dp_write_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; - insn->bits3.dp_write_igdng.msg_control = msg_control; - insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write_igdng.msg_type = msg_type; - insn->bits3.dp_write_igdng.send_commit_msg = 0; - insn->bits3.dp_write_igdng.header_present = 1; - insn->bits3.dp_write_igdng.response_length = response_length; - insn->bits3.dp_write_igdng.msg_length = msg_length; - insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = 0; + insn->bits3.dp_write_gen5.header_present = 1; + insn->bits3.dp_write_gen5.response_length = response_length; + insn->bits3.dp_write_gen5.msg_length = msg_length; + insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.dp_write.binding_table_index = binding_table_index; insn->bits3.dp_write.msg_control = msg_control; @@ -398,18 +398,18 @@ static void brw_set_dp_read_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; - insn->bits3.dp_read_igdng.msg_control = msg_control; - insn->bits3.dp_read_igdng.msg_type = msg_type; - insn->bits3.dp_read_igdng.target_cache = target_cache; - insn->bits3.dp_read_igdng.header_present = 1; - insn->bits3.dp_read_igdng.response_length = response_length; - insn->bits3.dp_read_igdng.msg_length = msg_length; - insn->bits3.dp_read_igdng.pad1 = 0; - insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + insn->bits3.dp_read_gen5.header_present = 1; + insn->bits3.dp_read_gen5.response_length = response_length; + insn->bits3.dp_read_gen5.msg_length = msg_length; + insn->bits3.dp_read_gen5.pad1 = 0; + insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ @@ -437,18 +437,18 @@ static void brw_set_sampler_message(struct brw_context *brw, assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.sampler_igdng.binding_table_index = binding_table_index; - insn->bits3.sampler_igdng.sampler = sampler; - insn->bits3.sampler_igdng.msg_type = msg_type; - insn->bits3.sampler_igdng.simd_mode = simd_mode; - insn->bits3.sampler_igdng.header_present = header_present; - insn->bits3.sampler_igdng.response_length = response_length; - insn->bits3.sampler_igdng.msg_length = msg_length; - insn->bits3.sampler_igdng.end_of_thread = eot; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; - insn->bits2.send_igdng.end_of_thread = eot; - } else if (BRW_IS_G4X(brw)) { + if (brw->gen == 5) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + insn->bits3.sampler_gen5.header_present = header_present; + insn->bits3.sampler_gen5.response_length = response_length; + insn->bits3.sampler_gen5.msg_length = msg_length; + insn->bits3.sampler_gen5.end_of_thread = eot; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_gen5.end_of_thread = eot; + } else if (brw->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -478,7 +478,7 @@ static struct brw_instruction *next_insn( struct brw_compile *p, if (0 && (BRW_DEBUG & DEBUG_DISASSEM)) { if (p->nr_insn) - brw_disasm_insn(stderr, &p->store[p->nr_insn-1]); + brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen); } assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); @@ -658,7 +658,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) { @@ -699,7 +699,7 @@ void brw_ENDIF(struct brw_compile *p, { GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) { @@ -813,7 +813,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) @@ -856,7 +856,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 06826635a8..2a8165b83e 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -51,13 +51,13 @@ static enum pipe_error compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); + c.need_ff_sync = brw->gen == 5; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = c.key.nr_attrs; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index b64ec286ce..6e070f6d75 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -103,7 +103,7 @@ gs_unit_create_from_key(struct brw_context *brw, else gs.thread4.max_threads = 0; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) gs.thread4.rendering_enable = 1; if (BRW_DEBUG & DEBUG_STATS) diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 6d89b5d2ba..d53ce6ccfd 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -239,7 +239,7 @@ static int prepare_depthbuffer(struct brw_context *brw) static int emit_depthbuffer(struct brw_context *brw) { struct pipe_surface *surface = brw->curr.fb.zsbuf; - unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + unsigned int len = (brw->is_g4x || brw->gen == 5) ? 6 : 5; if (surface == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -250,7 +250,7 @@ static int emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) OUT_BATCH(0); ADVANCE_BATCH(); @@ -298,7 +298,7 @@ static int emit_depthbuffer(struct brw_context *brw) ((surface->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) OUT_BATCH(0); ADVANCE_BATCH(); @@ -363,10 +363,10 @@ const struct brw_tracked_state brw_line_stipple = { /*********************************************************************** - * Misc invarient state packets + * Misc invariant state packets */ -static int upload_invarient_state( struct brw_context *brw ) +static int upload_invariant_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ @@ -374,7 +374,7 @@ static int upload_invarient_state( struct brw_context *brw ) struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) ps.header.opcode = CMD_PIPELINE_SELECT_GM45; else ps.header.opcode = CMD_PIPELINE_SELECT_965; @@ -413,7 +413,7 @@ static int upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) vfs.opcode = CMD_VF_STATISTICS_GM45; else vfs.opcode = CMD_VF_STATISTICS_965; @@ -424,7 +424,7 @@ static int upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &vfs); } - if (!BRW_IS_965(brw)) + if (!(brw->gen == 4)) { struct brw_aa_line_parameters balp; @@ -439,7 +439,7 @@ static int upload_invarient_state( struct brw_context *brw ) { struct brw_polygon_stipple_offset bpso; - /* This is invarient state in gallium: + /* This is invariant state in gallium: */ memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; @@ -453,13 +453,13 @@ static int upload_invarient_state( struct brw_context *brw ) return 0; } -const struct brw_tracked_state brw_invarient_state = { +const struct brw_tracked_state brw_invariant_state = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .emit = upload_invarient_state + .emit = upload_invariant_state }; @@ -480,7 +480,7 @@ static int upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { BEGIN_BATCH(8, IGNORE_CLIPRECTS); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 0ae1a6be9e..3b4a99beed 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -38,7 +38,6 @@ void brw_context_flush( struct brw_context *brw ) static void brw_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { brw_context_flush( brw_context( pipe ) ); diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 4c1a6d7dcd..c86681d149 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -35,7 +35,7 @@ calculate_clip_key_rast( const struct brw_context *brw, { memset(key, 0, sizeof *key); - if (brw->chipset.is_igdng) + if (brw->gen == 5) key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key->clip_mode = BRW_CLIPMODE_NORMAL; diff --git a/src/gallium/drivers/i965/brw_pipe_surface.c b/src/gallium/drivers/i965/brw_pipe_surface.c index 4deead98b1..58a610089e 100644 --- a/src/gallium/drivers/i965/brw_pipe_surface.c +++ b/src/gallium/drivers/i965/brw_pipe_surface.c @@ -169,20 +169,15 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, surface->ss.ss1.base_addr = surface->offset - tile_offset; - if (brw_screen->chipset.is_g4x) { - if (tex->tiling == BRW_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; - surface->ss.ss5.y_offset = tile_offset / 512 / 2; - } else { - surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; + if (tex->tiling == BRW_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 512 / 2; + } else { + surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; surface->ss.ss5.y_offset = tile_offset / 128 / 2; - } - } - else { - assert(tile_offset == 0); } } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 007239efc4..570ea23ff4 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -4,6 +4,7 @@ #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_transfer.h" static unsigned brw_translate_surface_format( unsigned id ) @@ -203,7 +204,7 @@ static void brw_translate_vertex_elements(struct brw_context *brw, brw_velems->ve[i].ve1.vfcomponent2 = comp2; brw_velems->ve[i].ve1.vfcomponent3 = comp3; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) brw_velems->ve[i].ve1.dst_offset = 0; else brw_velems->ve[i].ve1.dst_offset = i * 4; @@ -248,7 +249,6 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct brw_context *brw = brw_context(pipe); - unsigned i; /* Check for no change */ if (count == brw->curr.num_vertex_buffers && @@ -257,18 +257,9 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, count * sizeof buffers[0]) == 0) return; - /* Adjust refcounts */ - for (i = 0; i < count; i++) - pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, - buffers[i].buffer); - - for ( ; i < brw->curr.num_vertex_buffers; i++) - pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, - NULL); - - /* Copy remaining data */ - memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]); - brw->curr.num_vertex_buffers = count; + util_copy_vertex_buffers(brw->curr.vertex_buffer, + &brw->curr.num_vertex_buffers, + buffers, count); brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER; } @@ -312,15 +303,20 @@ brw_pipe_vertex_init( struct brw_context *brw ) brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; + brw->base.redefine_user_buffer = u_default_redefine_user_buffer; } void brw_pipe_vertex_cleanup( struct brw_context *brw ) { + unsigned i; /* Release bound pipe vertex_buffers */ + for (i = 0; i < brw->curr.num_vertex_buffers; i++) { + pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, NULL); + } /* Release some other stuff */ diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h index ba10f9d5df..53c7c43571 100644 --- a/src/gallium/drivers/i965/brw_reg.h +++ b/src/gallium/drivers/i965/brw_reg.h @@ -93,18 +93,54 @@ #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 -struct brw_chipset { - unsigned pci_id:16; - unsigned is_965:1; - unsigned is_igdng:1; - unsigned is_g4x:1; - unsigned pad:13; -}; - +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_IRONLAKE(devid) IS_GEN5(devid) + +#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_965(devid) (IS_GEN4(devid) || \ + IS_G4X(devid) || \ + IS_GEN5(devid) || \ + IS_GEN6(devid)) /* XXX: hacks */ diff --git a/src/gallium/drivers/i965/brw_resource_buffer.c b/src/gallium/drivers/i965/brw_resource_buffer.c index afb96ee3e7..32dc54f2b2 100644 --- a/src/gallium/drivers/i965/brw_resource_buffer.c +++ b/src/gallium/drivers/i965/brw_resource_buffer.c @@ -91,30 +91,10 @@ brw_buffer_transfer_unmap( struct pipe_context *pipe, } -static unsigned brw_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - int layer) -{ - struct brw_context *brw = brw_context(pipe); - struct brw_winsys_buffer *batch_bo = brw->batch->buf; - struct brw_buffer *buf = brw_buffer(resource); - - if (buf->bo == NULL) - return PIPE_UNREFERENCED; - - if (!brw_screen(pipe->screen)->sws->bo_references( batch_bo, buf->bo )) - return PIPE_UNREFERENCED; - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - - struct u_resource_vtbl brw_buffer_vtbl = { brw_buffer_get_handle, /* get_handle */ brw_buffer_destroy, /* resource_destroy */ - brw_buffer_is_referenced, /* is_resource_referenced */ u_default_get_transfer, /* get_transfer */ u_default_transfer_destroy, /* transfer_destroy */ brw_buffer_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c index fded2da382..71a1829024 100644 --- a/src/gallium/drivers/i965/brw_resource_texture.c +++ b/src/gallium/drivers/i965/brw_resource_texture.c @@ -225,48 +225,6 @@ static void brw_texture_destroy(struct pipe_screen *screen, } - - -static unsigned brw_texture_is_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, - int layer ) -{ - struct brw_context *brw = brw_context(pipe); - struct brw_screen *bscreen = brw_screen(pipe->screen); - struct brw_winsys_buffer *batch_bo = brw->batch->buf; - struct brw_texture *tex = brw_texture(texture); - struct brw_surface *surf; - int i; - - /* XXX: this is subject to false positives if the underlying - * texture BO is referenced, we can't tell whether the sub-region - * we care about participates in that. - */ - if (bscreen->sws->bo_references( batch_bo, tex->bo )) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - - /* Find any view on this texture for this level/layer and see if it - * is referenced: - */ - for (i = 0; i < 2; i++) { - foreach (surf, &tex->views[i]) { - if (surf->bo == tex->bo) - continue; - - if (!(layer == -1 || surf->id.bits.layer == layer) || - surf->id.bits.level != level) - continue; - - if (bscreen->sws->bo_references( batch_bo, surf->bo)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - } - } - - return PIPE_UNREFERENCED; -} - - /* * Transfer functions */ @@ -347,7 +305,6 @@ struct u_resource_vtbl brw_texture_vtbl = { brw_texture_get_handle, /* get_handle */ brw_texture_destroy, /* resource_destroy */ - brw_texture_is_referenced, /* is_resource_referenced */ brw_texture_get_transfer, /* get_transfer */ u_default_transfer_destroy, /* transfer_destroy */ brw_texture_transfer_map, /* transfer_map */ @@ -392,7 +349,7 @@ brw_texture_create( struct pipe_screen *screen, if (tex->compressed == 0 && !bscreen->no_tiling) { - if (bscreen->chipset.is_965 && + if (bscreen->gen < 5 && util_format_is_depth_or_stencil(template->format)) tex->tiling = BRW_TILING_Y; else diff --git a/src/gallium/drivers/i965/brw_resource_texture_layout.c b/src/gallium/drivers/i965/brw_resource_texture_layout.c index 2187bdd82c..afecc77e31 100644 --- a/src/gallium/drivers/i965/brw_resource_texture_layout.c +++ b/src/gallium/drivers/i965/brw_resource_texture_layout.c @@ -388,7 +388,7 @@ GLboolean brw_texture_layout(struct brw_screen *brw_screen, { switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: - if (brw_screen->chipset.is_igdng) + if (brw_screen->gen == 5) brw_layout_cubemap_idgng( tex ); else brw_layout_3d_cube( tex ); diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index f5b75b17e3..25204fd088 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -97,7 +97,7 @@ brw_get_name(struct pipe_screen *screen) static char buffer[128]; const char *chipset; - switch (brw_screen(screen)->chipset.pci_id) { + switch (brw_screen(screen)->pci_id) { case PCI_CHIP_I965_G: chipset = "I965_G"; break; @@ -278,8 +278,7 @@ brw_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_L8_UNORM, @@ -365,20 +364,19 @@ brw_fence_reference(struct pipe_screen *screen, { } -static int +static boolean brw_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { - return 0; /* XXX shouldn't this be a boolean? */ + return TRUE; } -static int +static boolean brw_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { - return 0; + return TRUE; } @@ -405,8 +403,6 @@ struct pipe_screen * brw_screen_create(struct brw_winsys_screen *sws) { struct brw_screen *bscreen; - struct brw_chipset chipset; - #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); @@ -415,46 +411,30 @@ brw_screen_create(struct brw_winsys_screen *sws) BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); #endif - memset(&chipset, 0, sizeof chipset); - - chipset.pci_id = sws->pci_id; - - switch (chipset.pci_id) { - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_Q: - case PCI_CHIP_I965_G_1: - case PCI_CHIP_I946_GZ: - case PCI_CHIP_I965_GM: - case PCI_CHIP_I965_GME: - chipset.is_965 = TRUE; - break; - - case PCI_CHIP_GM45_GM: - case PCI_CHIP_IGD_E_G: - case PCI_CHIP_Q45_G: - case PCI_CHIP_G45_G: - case PCI_CHIP_G41_G: - case PCI_CHIP_B43_G: - chipset.is_g4x = TRUE; - break; - - case PCI_CHIP_ILD_G: - case PCI_CHIP_ILM_G: - chipset.is_igdng = TRUE; - break; + bscreen = CALLOC_STRUCT(brw_screen); + if (!bscreen) + return NULL; - default: + bscreen->pci_id = sws->pci_id; + if (IS_GEN6(sws->pci_id)) { + bscreen->gen = 6; + bscreen->needs_ff_sync = TRUE; + } else if (IS_GEN5(sws->pci_id)) { + bscreen->gen = 5; + bscreen->needs_ff_sync = TRUE; + } else if (IS_965(sws->pci_id)) { + bscreen->gen = 4; + if (IS_G4X(sws->pci_id)) { + bscreen->is_g4x = true; + } + } else { debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, chipset.pci_id); + __FUNCTION__, sws->pci_id); + free(bscreen); return NULL; } - - bscreen = CALLOC_STRUCT(brw_screen); - if (!bscreen) - return NULL; - - bscreen->chipset = chipset; + sws->gen = bscreen->gen; bscreen->sws = sws; bscreen->base.winsys = NULL; bscreen->base.destroy = brw_destroy_screen; diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 58e293bc76..a62e1afc40 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -43,7 +43,11 @@ struct brw_winsys_screen; struct brw_screen { struct pipe_screen base; - struct brw_chipset chipset; + int gen; + boolean has_negative_rhw_bug; + boolean needs_ff_sync; + boolean is_g4x; + int pci_id; struct brw_winsys_screen *sws; boolean no_tiling; }; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 497634ec9e..901c334164 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -161,7 +161,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; brw_push_insn_state(p); @@ -205,7 +205,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; brw_push_insn_state(p); diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 6c299a86b4..eec024650c 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -148,7 +148,7 @@ sf_unit_create_from_key(struct brw_context *brw, sf.thread3.dispatch_grf_start_reg = 3; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) sf.thread3.urb_entry_read_offset = 3; else sf.thread3.urb_entry_read_offset = 1; @@ -161,7 +161,7 @@ sf_unit_create_from_key(struct brw_context *brw, /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or * 48(IGDNG) threads */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) chipset_max_threads = 48; else chipset_max_threads = 24; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index d2bbd0123d..380d511f9b 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -56,7 +56,7 @@ const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; const struct brw_tracked_state brw_curbe_buffer; const struct brw_tracked_state brw_curbe_offsets; -const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_invariant_state; const struct brw_tracked_state brw_gs_prog; const struct brw_tracked_state brw_gs_unit; const struct brw_tracked_state brw_line_stipple; diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index f8b91eff81..cdbf270e06 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -69,7 +69,7 @@ const struct brw_tracked_state *atoms[] = /* Command packets: */ - &brw_invarient_state, + &brw_invariant_state, &brw_state_base_address, &brw_binding_table_pointers, diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index e97ddeb5e1..b0d75b4f82 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -279,7 +279,7 @@ struct brw_aa_line_parameters struct header header; struct { - GLuint aa_coverage_scope:8; + GLuint aa_coverage_slope:8; GLuint pad0:8; GLuint aa_coverage_bias:8; GLuint pad1:8; @@ -659,7 +659,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:26; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -814,6 +912,13 @@ struct brw_sf_unit_state }; +struct gen6_scissor_rect +{ + GLuint xmin:16; + GLuint ymin:16; + GLuint xmax:16; + GLuint ymax:16; +}; struct brw_gs_unit_state { @@ -825,7 +930,7 @@ struct brw_gs_unit_state struct { GLuint pad0:8; - GLuint rendering_enable:1; /* for IGDNG */ + GLuint rendering_enable:1; /* for Ironlake */ GLuint pad4:1; GLuint stats_enable:1; GLuint nr_urb_entries:7; @@ -935,7 +1040,7 @@ struct brw_wm_unit_state GLfloat global_depth_offset_constant; GLfloat global_depth_offset_scale; - /* for IGDNG only */ + /* for Ironlake only */ struct { GLuint pad0:1; GLuint grf_reg_count_1:3; @@ -962,6 +1067,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -973,7 +1087,7 @@ struct brw_sampler_state GLuint mag_filter:3; GLuint mip_filter:2; GLuint base_level:5; - GLuint pad:1; + GLuint min_mag_neq:1; GLuint lod_preclamp:1; GLuint default_color_mode:1; GLuint pad0:1; @@ -985,7 +1099,8 @@ struct brw_sampler_state GLuint r_wrap_mode:3; GLuint t_wrap_mode:3; GLuint s_wrap_mode:3; - GLuint pad:3; + GLuint cube_control_mode:1; + GLuint pad:2; GLuint max_lod:10; GLuint min_lod:10; } ss1; @@ -999,7 +1114,9 @@ struct brw_sampler_state struct brw_ss3 { - GLuint pad:19; + GLuint non_normalized_coord:1; + GLuint pad:12; + GLuint address_round:6; GLuint max_aniso:3; GLuint chroma_key_mode:1; GLuint chroma_key_index:2; @@ -1044,6 +1161,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state @@ -1055,7 +1181,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1202,7 +1333,8 @@ struct brw_instruction GLuint predicate_inverse:1; GLuint execution_size:3; GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ - GLuint pad0:2; + GLuint acc_wr_control:1; + GLuint cmpt_control:1; GLuint debug_control:1; GLuint saturate:1; } header; @@ -1250,7 +1382,7 @@ struct brw_instruction GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } da16; @@ -1264,9 +1396,21 @@ struct brw_instruction GLuint dest_writemask:4; GLint dest_indirect_offset:6; GLuint dest_subreg_nr:3; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } ia16; + + struct { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + + GLint jump_count:16; + } branch_gen6; } bits1; @@ -1339,7 +1483,7 @@ struct brw_instruction GLuint end_of_thread:1; GLuint pad1:1; GLuint sfid:4; - } send_igdng; /* for IGDNG only */ + } send_gen5; /* for Ironlake only */ } bits2; @@ -1413,6 +1557,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; @@ -1440,7 +1599,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } math_igdng; + } math_gen5; struct { GLuint binding_table_index:8; @@ -1476,7 +1635,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } sampler_igdng; + } sampler_gen5; struct brw_urb_immediate urb; @@ -1494,7 +1653,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } urb_igdng; + } urb_gen5; struct { GLuint binding_table_index:8; @@ -1510,6 +1669,18 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read_g4x; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint msg_type:3; GLuint target_cache:2; @@ -1519,7 +1690,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_read_igdng; + } dp_read_gen5; struct { GLuint binding_table_index:8; @@ -1546,10 +1717,38 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_write_igdng; + } dp_write_gen5; + + /* Sandybridge DP for sample cache, constant cache, render cache */ + struct { + GLuint binding_table_index:8; + GLuint msg_control:5; + GLuint msg_type:3; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_sampler_const_cache; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint slot_group_select:1; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:4; + GLuint send_commit_msg:1; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_render_cache; struct { - GLuint pad:16; + GLuint function_control:16; GLuint response_length:4; GLuint msg_length:4; GLuint msg_target:4; @@ -1557,14 +1756,15 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + /* Of this struct, only end_of_thread is not present for gen6. */ struct { - GLuint pad:19; + GLuint function_control:19; GLuint header_present:1; GLuint response_length:5; GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } generic_igdng; + } generic_gen5; GLint d; GLuint ud; diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c index cd40fc6d61..f3de2f995b 100644 --- a/src/gallium/drivers/i965/brw_structs_dump.c +++ b/src/gallium/drivers/i965/brw_structs_dump.c @@ -72,7 +72,7 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr) { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope); + debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_slope); debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias); debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope); debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias); diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 907ec56c6c..b630752809 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -147,7 +147,7 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->urb.constrained = 0; - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { brw->urb.nr_vs_entries = 128; brw->urb.nr_sf_entries = 48; if (check_urb_layout(brw)) { @@ -157,7 +157,7 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; } - } else if (BRW_IS_G4X(brw)) { + } else if (brw->is_g4x) { brw->urb.nr_vs_entries = 64; if (check_urb_layout(brw)) { goto done; diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 944d88c84c..b6d1091618 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -56,7 +56,6 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; - struct brw_chipset chipset; struct brw_vertex_shader *vp; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 5dcbd597dd..559f0c61d8 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -116,6 +116,7 @@ static boolean find_output_slot( struct brw_vs_compile *c, */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { + struct brw_context *brw = c->func.brw; GLuint i, reg = 0, subreg = 0, mrf; int attributes_in_vue; @@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->nr_outputs = c->prog_data.nr_outputs; - if (c->chipset.is_igdng) + if (brw->gen == 5) mrf = 8; else mrf = 4; @@ -333,7 +334,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (c->chipset.is_igdng) + if (brw->gen == 5) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1124,6 +1125,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_vertex_write( struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; @@ -1143,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ if (c->prog_data.writes_psiz || c->key.nr_userclip || - c->chipset.is_965) + brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1174,7 +1176,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1202,7 +1204,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - if (c->chipset.is_igdng) { + if (brw->gen == 5) { /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ /* m4, m5 contain the distances from vertex to the user clip planeXXX. @@ -1339,6 +1341,7 @@ static void emit_insn(struct brw_vs_compile *c, unsigned opcode = inst->Instruction.Opcode; unsigned label = inst->Label.Label; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg args[3], dst; GLuint i; @@ -1514,7 +1517,7 @@ static void emit_insn(struct brw_vs_compile *c, c->loop_depth--; - if (c->chipset.is_igdng) + if (brw->gen == 5) br = 2; inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]); @@ -1652,6 +1655,6 @@ void brw_vs_emit(struct brw_vs_compile *c) if (BRW_DEBUG & DEBUG_VS) { debug_printf("vs-native:\n"); - brw_disasm(stderr, p->store, p->nr_insn); + brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen); } } diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index dadbb622e4..6d2ccfd6d9 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -100,7 +100,7 @@ vs_unit_create_from_key(struct brw_context *brw, */ vs.thread1.single_program_flow = 0; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ else vs.thread1.binding_table_entry_count = key->nr_surfaces; @@ -111,16 +111,16 @@ vs_unit_create_from_key(struct brw_context *brw, vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; else vs.thread4.nr_urb_entries = key->nr_urb_entries; vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) chipset_max_threads = 72; - else if (BRW_IS_G4X(brw)) + else if (brw->is_g4x) chipset_max_threads = 32; else chipset_max_threads = 16; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index a06f8bb7d6..038f6f788a 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -148,7 +148,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc, struct brw_winsys_screen { unsigned pci_id; - + int gen; /** * Buffer functions. */ @@ -282,7 +282,7 @@ void brw_dump_data( unsigned pci_id, enum brw_buffer_data_type data_type, unsigned offset, const void *data, - size_t size ); + size_t size, int gen ); #endif diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c index f8f6a539bc..b66b1cfccb 100644 --- a/src/gallium/drivers/i965/brw_winsys_debug.c +++ b/src/gallium/drivers/i965/brw_winsys_debug.c @@ -9,7 +9,7 @@ void brw_dump_data( unsigned pci_id, enum brw_buffer_data_type data_type, unsigned offset, const void *data, - size_t size ) + size_t size, int gen ) { if (BRW_DUMP & DUMP_ASM) { switch (data_type) { @@ -18,7 +18,7 @@ void brw_dump_data( unsigned pci_id, case BRW_DATA_GS_VS_PROG: case BRW_DATA_GS_GS_PROG: case BRW_DATA_GS_CLIP_PROG: - brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); + brw_disasm( stderr, (struct brw_instruction *)data, size / sizeof(struct brw_instruction), gen ); break; default: break; @@ -77,7 +77,7 @@ void brw_dump_data( unsigned pci_id, if (BRW_DUMP & DUMP_BATCH) { switch (data_type) { case BRW_DATA_BATCH_BUFFER: - intel_decode(data, size / 4, offset, pci_id); + intel_decode(data, size / 4, offset, pci_id, 0); break; default: break; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 8f983a60ae..6301062fd7 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -848,11 +848,11 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; } else { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; @@ -917,8 +917,8 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(8), coord[3]); msgLength = 9; - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + if (p->brw->gen == 5) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; @@ -1516,6 +1516,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) if (BRW_DEBUG & DEBUG_WM) { debug_printf("wm-native:\n"); - brw_disasm(stderr, p->store, p->nr_insn); + brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen); } } diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index f7ee55cc1c..a65e16edec 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -812,7 +812,7 @@ static void precalc_tex( struct brw_wm_compile *c, } /* XXX: add GL_EXT_texture_swizzle support to gallium -- by - * generating shader varients in mesa state tracker. + * generating shader variants in mesa state tracker. */ /* Release this temp if we ended up allocating it: diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 3b3afc39d3..fb8e40d928 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1607,7 +1607,7 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; } else { /* Does it work well on SIMD8? */ @@ -1688,7 +1688,7 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; else @@ -1970,7 +1970,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_ struct brw_instruction *inst0, *inst1; GLuint br = 1; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) br = 2; loop_depth--; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index efc2d96be1..a690003ecb 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -70,9 +70,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) key->max_threads = 12 * 6; - else if (BRW_IS_G4X(brw)) + else if (brw->is_g4x) key->max_threads = 10 * 5; else key->max_threads = 8 * 4; @@ -155,7 +155,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm.thread1.binding_table_entry_count = key->nr_surfaces; @@ -174,7 +174,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) wm.wm4.sampler_count = 0; /* hardware requirement */ else wm.wm4.sampler_count = (key->sampler_count + 1) / 4; @@ -277,7 +277,7 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw ) grf_reg_count = (align(key.total_grf, 16) / 16 - 1); per_thread_scratch_space = key.total_scratch / 1024 - 1; stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm; - sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4; + sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4; /* Emit WM program relocation */ make_reloc(&reloc[nr_reloc++], diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 36c04a3165..1abe869f1a 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -42,10 +42,11 @@ #include "util/u_memory.h" #include "util/u_string.h" + #include "intel_decode.h" +#include "brw_reg.h" /*#include "intel_chipset.h"*/ -#define IS_965(x) 1 /* XXX */ #define IS_9XX(x) 1 /* XXX */ #define BUFFER_FAIL(_count, _len, _name) do { \ @@ -99,10 +100,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) } opcodes_mi[] = { { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" }, { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, { 0x04, 0, 1, 1, "MI_FLUSH" }, - { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" }, { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, { 0x00, 0, 1, 1, "MI_NOOP" }, @@ -116,6 +118,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, }; + switch ((data[0] & 0x1f800000) >> 23) { + case 0x0a: + instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n"); + return -1; + } for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) { if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { @@ -308,9 +315,13 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) static int decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { - switch ((data[0] & 0x00f80000) >> 19) { + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { case 0x11: - instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n"); + instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); return 1; case 0x10: instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n"); @@ -326,7 +337,8 @@ decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return 1; } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); (*failures)++; return 1; } @@ -384,7 +396,7 @@ i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask sprintf(dstname, "oD%s%s", dstmask, sat); break; case 6: - if (dst_nr > 2) + if (dst_nr > 3) fprintf(out, "bad destination reg U%d\n", dst_nr); sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); break; @@ -455,7 +467,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) break; case 6: sprintf(name, "U%d", src_nr); - if (src_nr > 2) + if (src_nr > 3) fprintf(out, "bad src reg %s\n", name); break; default: @@ -800,10 +812,14 @@ i915_decode_instruction(const uint32_t *data, uint32_t hw_offset, } static int -decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) +decode_3d_1d(const uint32_t *data, int count, + uint32_t hw_offset, + uint32_t devid, + int *failures) { - unsigned int len, i, c, opcode, word, map, sampler, instr; + unsigned int len, i, c, idx, word, map, sampler, instr; char *format; + uint32_t opcode; struct { uint32_t opcode; @@ -814,7 +830,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } opcodes_3d_1d[] = { { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" }, - { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" }, + { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" }, { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, @@ -822,7 +838,6 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" }, { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" }, - { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" }, { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" }, @@ -834,9 +849,11 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" }, { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" }, { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" }, - }; + }, *opcode_3d_1d; - switch ((data[0] & 0x00ff0000) >> 16) { + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { case 0x07: /* This instruction is unusual. A 0 length means just 1 DWORD instead of * 2. The 0 length is specified in one place to be unsupported, but @@ -891,26 +908,56 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); len = (data[0] & 0x0000000f) + 2; i = 1; - for (word = 0; word <= 7; word++) { + for (word = 0; word <= 8; word++) { if (data[0] & (1 << (4 + word))) { if (i >= count) BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1"); /* save vertex state for decode */ - if (word == 2) { - saved_s2_set = 1; - saved_s2 = data[i]; - } - if (word == 4) { - saved_s4_set = 1; - saved_s4 = data[i]; + if (IS_9XX(devid)) { + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } } instr_out(data, hw_offset, i++, "S%d\n", word); } } if (len != i) { - fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + (*failures)++; + } + return len; + case 0x03: + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (i >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2"); + + if (word == 6) + instr_out(data, hw_offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + instr_out(data, hw_offset, i++, "TB%dC\n", word - 7); + instr_out(data, hw_offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11); + } + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); (*failures)++; } return len; @@ -922,11 +969,27 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, i = 2; for (map = 0; map <= 15; map++) { if (data[1] & (1 << map)) { + int width, height, pitch, dword; + const char *tiling; + if (i + 3 >= count) BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); instr_out(data, hw_offset, i++, "map %d MS2\n", map); - instr_out(data, hw_offset, i++, "map %d MS3\n", map); - instr_out(data, hw_offset, i++, "map %d MS4\n", map); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch); } } if (len != i) { @@ -982,8 +1045,8 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } return len; case 0x01: - if (i830) - break; + if (!IS_9XX(devid)) + break; instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n"); instr_out(data, hw_offset, 1, "mask\n"); len = (data[0] & 0x0000003f) + 2; @@ -1034,30 +1097,61 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, format, (data[1] & (1 << 31)) ? "en" : "dis"); return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n"); + if (count < 3) + BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO"); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n"); + instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + instr_out(data, hw_offset, 2, "address\n"); + return len; + } } - for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) { - if (opcodes_3d_1d[opcode].i830_only && !i830) + for (idx = 0; idx < Elements(opcodes_3d_1d); idx++) + { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (opcode_3d_1d->i830_only && IS_9XX(devid)) continue; - if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) { + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { len = 1; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name); - if (opcodes_3d_1d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name); + if (opcode_3d_1d->max_len > 1) { len = (data[0] & 0x0000ffff) + 2; - if (len < opcodes_3d_1d[opcode].min_len || - len > opcodes_3d_1d[opcode].max_len) + if (len < opcode_3d_1d->min_len || + len > opcode_3d_1d->max_len) { fprintf(out, "Bad count in %s\n", - opcodes_3d_1d[opcode].name); + opcode_3d_1d->name); (*failures)++; } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d_1d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } @@ -1065,7 +1159,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1075,8 +1169,10 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { char immediate = (data[0] & (1 << 23)) == 0; - unsigned int len, i; + unsigned int len, i, ret; char *primtype; + int original_s2 = saved_s2; + int original_s4 = saved_s4; switch ((data[0] >> 18) & 0xf) { case 0x0: primtype = "TRILIST"; break; @@ -1089,7 +1185,7 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, case 0x7: primtype = "RECTLIST"; break; case 0x8: primtype = "POINTLIST"; break; case 0x9: primtype = "DIB"; break; - case 0xa: primtype = "CLEAR_RECT"; break; + case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break; default: primtype = "unknown"; break; } @@ -1193,6 +1289,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, vertex++; } } + + ret = len; } else { /* indirect vertices */ len = data[0] & 0x0000ffff; /* index count */ @@ -1210,13 +1308,15 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, if ((data[i] & 0xffff) == 0xffff) { instr_out(data, hw_offset, i, " indices: (terminator)\n"); - return i; + ret = i; + goto out; } else if ((data[i] >> 16) == 0xffff) { instr_out(data, hw_offset, i, " indices: 0x%04x, " "(terminator)\n", data[i] & 0xffff); - return i; + ret = i; + goto out; } else { instr_out(data, hw_offset, i, " indices: 0x%04x, 0x%04x\n", @@ -1226,7 +1326,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, fprintf(out, "3DPRIMITIVE: no terminator found in index buffer\n"); (*failures)++; - return count; + ret = count; + goto out; } else { /* fixed size vertex index buffer */ for (i = 0; i < len; i += 2) { @@ -1241,7 +1342,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, } } } - return (len + 1) / 2 + 1; + ret = (len + 1) / 2 + 1; + goto out; } else { /* sequential vertex access */ if (count < 2) @@ -1250,17 +1352,22 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, "3DPRIMITIVE sequential indirect %s, %d starting from " "%d\n", primtype, len, data[1] & 0xffff); instr_out(data, hw_offset, 1, " start\n"); - return 2; + ret = 2; + goto out; } } - return len; +out: + saved_s2 = original_s2; + saved_s4 = original_s4; + return ret; } static int -decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode; + uint32_t opcode; + unsigned int idx; struct { uint32_t opcode; @@ -1277,41 +1384,44 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x0d, 1, 1, "3DSTATE_MODES_4" }, { 0x0c, 1, 1, "3DSTATE_MODES_5" }, { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, - }; + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; - switch ((data[0] & 0x1f000000) >> 24) { + switch (opcode) { case 0x1f: return decode_3d_primitive(data, count, hw_offset, failures); case 0x1d: - return decode_3d_1d(data, count, hw_offset, failures, 0); + return decode_3d_1d(data, count, hw_offset, devid, failures); case 0x1c: return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if (opcode == opcode_3d->opcode) { unsigned int len = 1, i; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1401,12 +1511,87 @@ get_965_prim_type(uint32_t data) default: return "fail"; } } +static int +i965_decode_urb_fence(const uint32_t *data, uint32_t hw_offset, int len, int count, + int *failures) +{ + uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence; + + if (len != 3) + fprintf(out, "Bad count in URB_FENCE\n"); + if (count < 3) + BUFFER_FAIL(count, len, "URB_FENCE"); + + vs_fence = data[1] & 0x3ff; + gs_fence = (data[1] >> 10) & 0x3ff; + clip_fence = (data[1] >> 20) & 0x3ff; + sf_fence = data[2] & 0x3ff; + vfe_fence = (data[2] >> 10) & 0x3ff; + cs_fence = (data[2] >> 20) & 0x7ff; + + instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + instr_out(data, hw_offset, 1, + "vs fence: %d, clip_fence: %d, gs_fence: %d\n", + vs_fence, clip_fence, gs_fence); + instr_out(data, hw_offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + sf_fence, vfe_fence, cs_fence); + if (gs_fence < vs_fence) + fprintf(out, "gs fence < vs fence!\n"); + if (clip_fence < gs_fence) + fprintf(out, "clip fence < gs fence!\n"); + if (sf_fence < clip_fence) + fprintf(out, "sf fence < clip fence!\n"); + if (cs_fence < sf_fence) + fprintf(out, "cs fence < sf fence!\n"); + + return len; +} + +static void +state_base_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *name) +{ + if (data[index] & 1) { + instr_out(data, hw_offset, index, "%s state base address 0x%08x\n", + name, data[index] & ~1); + } else { + instr_out(data, hw_offset, index, "%s state base not updated\n", + name); + } +} + +static void +state_max_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *name) +{ + if (data[index] & 1) { + if (data[index] == 1) { + instr_out(data, hw_offset, index, + "%s state upper bound disabled\n", name); + } else { + instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + } + } else { + instr_out(data, hw_offset, index, "%s state upper bound not updated\n", + name); + } +} static int -decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode, len; - int i; + uint32_t opcode; + unsigned int idx, len; + int i, sba_len; + char *desc1 = NULL; struct { uint32_t opcode; @@ -1435,51 +1620,78 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, { 0x7b00, 6, 6, "3DPRIMITIVE" }, - }; + { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, + { 0x7811, 7, 7, "3DSTATE_GS_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7813, 20, 20, "3DSTATE_SF_STATE" }, + { 0x7814, 9, 9, "3DSTATE_WM_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }, *opcode_3d; len = (data[0] & 0x0000ffff) + 2; - switch ((data[0] & 0xffff0000) >> 16) { + opcode = (data[0] & 0xffff0000) >> 16; + switch (opcode) { + case 0x6000: + len = (data[0] & 0x000000ff) + 2; + return i965_decode_urb_fence(data, hw_offset, len, count, failures); + case 0x6001: + instr_out(data, hw_offset, 0, "CS_URB_STATE\n"); + instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + len = (data[0] & 0x000000ff) + 2; + instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; case 0x6101: - if (len != 6) + if (IS_GEN6(devid)) + sba_len = 10; + else if (IS_IRONLAKE(devid)) + sba_len = 8; + else + sba_len = 6; + if (len != sba_len) fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); - if (count < 6) + if (len != sba_len) BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS"); + i = 0; instr_out(data, hw_offset, 0, "STATE_BASE_ADDRESS\n"); - - if (data[1] & 1) { - instr_out(data, hw_offset, 1, "General state at 0x%08x\n", - data[1] & ~1); - } else - instr_out(data, hw_offset, 1, "General state not updated\n"); - - if (data[2] & 1) { - instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n", - data[2] & ~1); - } else - instr_out(data, hw_offset, 2, "Surface state not updated\n"); - - if (data[3] & 1) { - instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n", - data[3] & ~1); - } else - instr_out(data, hw_offset, 3, "Indirect state not updated\n"); - - if (data[4] & 1) { - instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n", - data[4] & ~1); - } else - instr_out(data, hw_offset, 4, "General state not updated\n"); - - if (data[5] & 1) { - instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n", - data[5] & ~1); - } else - instr_out(data, hw_offset, 5, "Indirect state not updated\n"); + i++; + + state_base_out(data, hw_offset, i++, "general"); + state_base_out(data, hw_offset, i++, "surface"); + if (IS_GEN6(devid)) + state_base_out(data, hw_offset, i++, "dynamic"); + state_base_out(data, hw_offset, i++, "indirect"); + if (IS_IRONLAKE(devid) || IS_GEN6(devid)) + state_base_out(data, hw_offset, i++, "instruction"); + + state_max_out(data, hw_offset, i++, "general"); + if (IS_GEN6(devid)) + state_max_out(data, hw_offset, i++, "dynamic"); + state_max_out(data, hw_offset, i++, "indirect"); + if (IS_IRONLAKE(devid) || IS_GEN6(devid)) + state_max_out(data, hw_offset, i++, "instruction"); return len; case 0x7800: @@ -1498,18 +1710,33 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures instr_out(data, hw_offset, 6, "CC state\n"); return len; case 0x7801: - if (len != 6) + len = (data[0] & 0x000000ff) + 2; + if (len != 6 && len != 4) fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); - if (count < 6) - BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + if (len == 6) { + if (count < 6) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "Clip binding table\n"); + instr_out(data, hw_offset, 4, "SF binding table\n"); + instr_out(data, hw_offset, 5, "WM binding table\n"); + } else { + if (count < 4) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); - instr_out(data, hw_offset, 0, - "3DSTATE_BINDING_TABLE_POINTERS\n"); - instr_out(data, hw_offset, 1, "VS binding table\n"); - instr_out(data, hw_offset, 2, "GS binding table\n"); - instr_out(data, hw_offset, 3, "Clip binding table\n"); - instr_out(data, hw_offset, 4, "SF binding table\n"); - instr_out(data, hw_offset, 5, "WM binding table\n"); + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, " + "GS mod %d, PS mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 10)) != 0); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "WM binding table\n"); + } return len; @@ -1560,6 +1787,18 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures } return len; + case 0x780d: + len = (data[0] & 0xff) + 2; + if (len != 4) + fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n"); + instr_out(data, hw_offset, 1, "clip\n"); + instr_out(data, hw_offset, 2, "sf\n"); + instr_out(data, hw_offset, 3, "cc\n"); + return len; + case 0x780a: len = (data[0] & 0xff) + 2; if (len != 3) @@ -1592,7 +1831,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; case 0x7905: - if (len != 5 && len != 6) + if (len < 5 || len > 7) fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); if (count < len) BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); @@ -1609,9 +1848,36 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures ((data[3] & 0x0007ffc0) >> 6) + 1, ((data[3] & 0xfff80000) >> 19) + 1); instr_out(data, hw_offset, 4, "volume depth\n"); - if (len == 6) + if (len >= 6) instr_out(data, hw_offset, 5, "\n"); + if (len >= 7) + instr_out(data, hw_offset, 6, "render target view extent\n"); + + return len; + case 0x7a00: + len = (data[0] & 0xff) + 2; + if (len != 4) + fprintf(out, "Bad count in PIPE_CONTROL\n"); + if (count < len) + BUFFER_FAIL(count, len, "PIPE_CONTROL"); + + switch ((data[0] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + instr_out(data, hw_offset, 0, + "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, " + "%sinst flush\n", + desc1, + data[0] & (1 << 13) ? "" : "no ", + data[0] & (1 << 12) ? "" : "no ", + data[0] & (1 << 11) ? "" : "no "); + instr_out(data, hw_offset, 1, "destination address\n"); + instr_out(data, hw_offset, 2, "immediate dword low\n"); + instr_out(data, hw_offset, 3, "immediate dword high\n"); return len; case 0x7b00: @@ -1633,39 +1899,41 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) { unsigned int i; len = 1; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode); (*failures)++; return 1; } static int -decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode; + unsigned int idx; + uint32_t opcode; struct { uint32_t opcode; @@ -1689,41 +1957,44 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure { 0x0f, 1, 1, "3DSTATE_MODES_2" }, { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, { 0x16, 1, 1, "3DSTATE_MODES_4" }, - }; + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; - switch ((data[0] & 0x1f000000) >> 24) { + switch (opcode) { case 0x1f: return decode_3d_primitive(data, count, hw_offset, failures); case 0x1d: - return decode_3d_1d(data, count, hw_offset, failures, 1); + return decode_3d_1d(data, count, hw_offset, devid, failures); case 0x1c: return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) { unsigned int len = 1, i; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1736,8 +2007,12 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure * \param hw_offset hardware address for the buffer */ int -intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +intel_decode(const uint32_t *data, int count, + uint32_t hw_offset, + uint32_t devid, + uint32_t ignore_end_of_batchbuffer) { + int ret; int index = 0; int failures = 0; @@ -1746,8 +2021,23 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid while (index < count) { switch ((data[index] & 0xe0000000) >> 29) { case 0x0: - index += decode_mi(data + index, count - index, + ret = decode_mi(data + index, count - index, hw_offset + index * 4, &failures); + + /* If MI_BATCHBUFFER_END happened, then dump the rest of the + * output in case we some day want it in debugging, but don't + * decode it since it'll just confuse in the common case. + */ + if (ret == -1) { + if (ignore_end_of_batchbuffer) { + index++; + } else { + for (index = index + 1; index < count; index++) { + instr_out(data, hw_offset, index, "\n"); + } + } + } else + index += ret; break; case 0x2: index += decode_2d(data + index, count - index, @@ -1756,13 +2046,16 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid case 0x3: if (IS_965(devid)) { index += decode_3d_965(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } else if (IS_9XX(devid)) { index += decode_3d(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } else { index += decode_3d_i830(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } break; default: diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h index 7683097b86..7e7c108c0c 100644 --- a/src/gallium/drivers/i965/intel_decode.h +++ b/src/gallium/drivers/i965/intel_decode.h @@ -25,5 +25,7 @@ * */ -int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +#include "pipe/p_compiler.h" + +int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, uint32_t ignore_end_of_batchbuffer); void intel_decode_context_reset(void); diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h index 522e3bd92c..ec6eec8910 100644 --- a/src/gallium/drivers/i965/intel_structs.h +++ b/src/gallium/drivers/i965/intel_structs.h @@ -1,6 +1,8 @@ #ifndef INTEL_STRUCTS_H #define INTEL_STRUCTS_H +#include "brw_types.h" + struct br0 { GLuint length:8; GLuint pad0:3; diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 3efbd6a246..2a9d736015 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -668,34 +668,15 @@ identity_clear_depth_stencil(struct pipe_context *_pipe, static void identity_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; pipe->flush(pipe, - flags, fence); } -static unsigned int -identity_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, - int layer) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct identity_resource *id_resource = identity_resource(_resource); - struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *resource = id_resource->resource; - - return pipe->is_resource_referenced(pipe, - resource, - level, - layer); -} - static struct pipe_sampler_view * identity_context_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *_resource, @@ -855,6 +836,19 @@ identity_context_transfer_inline_write(struct pipe_context *_context, } +static void identity_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_resource *id_resource = identity_resource(_resource); + struct pipe_context *context = id_context->pipe; + struct pipe_resource *resource = id_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -918,7 +912,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.clear_render_target = identity_clear_render_target; id_pipe->base.clear_depth_stencil = identity_clear_depth_stencil; id_pipe->base.flush = identity_flush; - id_pipe->base.is_resource_referenced = identity_is_resource_referenced; id_pipe->base.create_surface = identity_context_create_surface; id_pipe->base.surface_destroy = identity_context_surface_destroy; id_pipe->base.create_sampler_view = identity_context_create_sampler_view; @@ -929,6 +922,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.transfer_unmap = identity_context_transfer_unmap; id_pipe->base.transfer_flush_region = identity_context_transfer_flush_region; id_pipe->base.transfer_inline_write = identity_context_transfer_inline_write; + id_pipe->base.redefine_user_buffer = identity_redefine_user_buffer; id_pipe->pipe = pipe; diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 644481bb74..9bf7fd4c0e 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -103,8 +103,7 @@ identity_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; @@ -113,8 +112,7 @@ identity_screen_is_format_supported(struct pipe_screen *_screen, format, target, sample_count, - tex_usage, - geom_flags); + tex_usage); } static struct pipe_context * @@ -242,30 +240,28 @@ identity_screen_fence_reference(struct pipe_screen *_screen, fence); } -static int +static boolean identity_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; return screen->fence_signalled(screen, - fence, - flags); + fence); } -static int +static boolean identity_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; return screen->fence_finish(screen, fence, - flags); + timeout); } struct pipe_screen * diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index e9374cc6ef..1fc7746a83 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -12,7 +12,7 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.6 (or later) + - LLVM. Version 2.8 recommended. 2.6 or later required. For Linux, on a recent Debian based distribution do: @@ -30,21 +30,8 @@ Requirements debug=no. This is necessary as LLVM builds as static library so the chosen MS CRT must match. - The version of LLVM from SVN ("2.7svn") from mid-March 2010 is pretty - stable and has some features not in version 2.6. - - scons (optional) - - udis86, http://udis86.sourceforge.net/ (optional). My personal repository - supports more opcodes which haven't been merged upstream yet: - - git clone git://anongit.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./autogen.sh - ./configure --with-pic - make - sudo make install - Building ======== @@ -94,13 +81,7 @@ that no tail call optimizations are done by gcc. To better profile JIT code you'll need to build LLVM with oprofile integration. - source_dir=$PWD/llvm-2.6 - build_dir=$source_dir/build/profile - install_dir=$source_dir-profile - - mkdir -p "$build_dir" - cd "$build_dir" && \ - $source_dir/configure \ + ./configure \ --prefix=$install_dir \ --enable-optimized \ --disable-profiling \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 26b258b956..c10a8cbc12 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -96,10 +96,15 @@ if env['platform'] != 'embedded': tests.append('round') for test in tests: + testname = 'lp_test_' + test target = env.Program( - target = 'lp_test_' + test, - source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + target = testname, + source = [testname + '.c', 'lp_test_main.c'], ) env.InstallProgram(target) + + # http://www.scons.org/wiki/UnitTests + alias = env.Alias(testname, [target], target[0].abspath) + AlwaysBuild(alias) Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 5c9392504f..06206a24d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -34,10 +34,12 @@ #ifndef LP_BLD_ALPHA_H #define LP_BLD_ALPHA_H +#include "pipe/p_compiler.h" #include "gallivm/lp_bld.h" struct pipe_alpha_state; +struct gallivm_state; struct lp_type; struct lp_build_mask_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 038b136a28..e01fc46ec1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -36,10 +36,14 @@ #define LP_BLD_DEPTH_H +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + #include "gallivm/lp_bld.h" struct pipe_depth_state; +struct gallivm_state; struct util_format_description; struct lp_type; struct lp_build_mask_context; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 2de20d6e9a..8a5655d499 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -125,6 +125,10 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } } + for (i = 0; i < llvmpipe->num_vertex_buffers; i++) { + pipe_resource_reference(&llvmpipe->vertex_buffer[i].buffer, NULL); + } + gallivm_destroy(llvmpipe->gallivm); align_free( llvmpipe ); @@ -132,10 +136,9 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) static void do_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence) { - llvmpipe_flush(pipe, flags, fence, __FUNCTION__); + llvmpipe_flush(pipe, fence, __FUNCTION__); } diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 3a55e76bc3..a21a3c7448 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -47,6 +47,9 @@ lp_fence_create(unsigned rank) static int fence_id; struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + if (!fence) + return NULL; + pipe_reference_init(&fence->reference, 1); pipe_mutex_init(fence->mutex); diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 85e3cdec82..42430550ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -40,12 +40,10 @@ /** - * \param flags bitmask of PIPE_FLUSH_x flags * \param fence if non-null, returns pointer to a fence which can be waited on */ void llvmpipe_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence, const char *reason) { @@ -54,7 +52,7 @@ llvmpipe_flush( struct pipe_context *pipe, draw_flush(llvmpipe->draw); /* ask the setup module to flush */ - lp_setup_flush(llvmpipe->setup, flags, fence, reason); + lp_setup_flush(llvmpipe->setup, fence, reason); if (llvmpipe_variant_count > 1000) { @@ -65,23 +63,21 @@ llvmpipe_flush( struct pipe_context *pipe, /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { - if (flags & PIPE_FLUSH_FRAME) { - static unsigned frame_no = 1; - char filename[256]; - unsigned i; - - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); - } - - if (0) { - util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); - } - - ++frame_no; + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]); + } + + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); } + + ++frame_no; } } @@ -90,9 +86,9 @@ llvmpipe_finish( struct pipe_context *pipe, const char *reason ) { struct pipe_fence_handle *fence = NULL; - llvmpipe_flush(pipe, 0, &fence, reason); + llvmpipe_flush(pipe, &fence, reason); if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_finish(pipe->screen, fence, PIPE_TIMEOUT_INFINITE); pipe->screen->fence_reference(pipe->screen, &fence, NULL); } } @@ -110,7 +106,6 @@ llvmpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, int layer, - unsigned flush_flags, boolean read_only, boolean cpu_access, boolean do_not_block, @@ -118,10 +113,10 @@ llvmpipe_flush_resource(struct pipe_context *pipe, { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, resource, level, layer); + referenced = llvmpipe_is_resource_referenced(pipe, resource, level, layer); - if ((referenced & PIPE_REFERENCED_FOR_WRITE) || - ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + if ((referenced & LP_REFERENCED_FOR_WRITE) || + ((referenced & LP_REFERENCED_FOR_READ) && !read_only)) { if (cpu_access) { /* @@ -136,7 +131,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe, * Just flush. */ - llvmpipe_flush(pipe, flush_flags, NULL, reason); + llvmpipe_flush(pipe, NULL, reason); } } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 579d24c68a..efff94c8c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -36,7 +36,6 @@ struct pipe_resource; void llvmpipe_flush(struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence, const char *reason); @@ -49,7 +48,6 @@ llvmpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, int layer, - unsigned flush_flags, boolean read_only, boolean cpu_access, boolean do_not_block, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index a775990f92..482a902dd2 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,9 +36,7 @@ #include "util/u_memory.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" -#include "gallivm/lp_bld_intr.h" #include "lp_context.h" -#include "lp_screen.h" #include "lp_jit.h" diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index b23a100b87..455adf7d6f 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -33,6 +33,7 @@ #ifndef LP_PERF_H #define LP_PERF_H +#include "pipe/p_compiler.h" /** * Various counters diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 84c66dd36e..1e2401fa84 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -69,7 +69,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) */ if (pq->fence) { if (!lp_fence_issued(pq->fence)) - llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); + llvmpipe_flush(pipe, NULL, __FUNCTION__); if (!lp_fence_signalled(pq->fence)) lp_fence_wait(pq->fence); @@ -99,7 +99,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, if (!lp_fence_signalled(pq->fence)) { if (!lp_fence_issued(pq->fence)) - llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); + llvmpipe_flush(pipe, NULL, __FUNCTION__); if (!wait) return FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 978d17c575..5d0f5f8b7b 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -74,6 +74,7 @@ lp_scene_create( struct pipe_context *pipe ) void lp_scene_destroy(struct lp_scene *scene) { + lp_fence_reference(&scene->fence, NULL); pipe_mutex_destroy(scene->mutex); assert(scene->data.head->next == NULL); FREE(scene->data.head); diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h index fd7c65a2c8..dd9ab593b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene_queue.h +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -29,6 +29,8 @@ #ifndef LP_SCENE_QUEUE #define LP_SCENE_QUEUE +#include "pipe/p_compiler.h" + struct lp_scene_queue; struct lp_scene; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9459a3cd11..521a52ad3a 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -151,7 +151,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: return 1; case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; + return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 1; @@ -164,6 +164,9 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTH_CLAMP: return 0; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + return 1; default: return 0; } @@ -222,8 +225,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bind, - unsigned geom_flags ) + unsigned bind) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct sw_winsys *winsys = screen->winsys; @@ -276,6 +278,11 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return util_format_s3tc_enabled; } + /* u_format doesn't support RGTC yet */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + return FALSE; + } + /* * Everything else should be supported by u_format. */ @@ -341,10 +348,9 @@ llvmpipe_fence_reference(struct pipe_screen *screen, /** * Has the fence been executed/finished? */ -static int +static boolean llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) + struct pipe_fence_handle *fence) { struct lp_fence *f = (struct lp_fence *) fence; return lp_fence_signalled(f); @@ -354,15 +360,15 @@ llvmpipe_fence_signalled(struct pipe_screen *screen, /** * Wait for the fence to finish. */ -static int +static boolean llvmpipe_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence_handle, - unsigned flag) + uint64_t timeout) { struct lp_fence *f = (struct lp_fence *) fence_handle; lp_fence_wait(f); - return 0; + return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index db04c84efb..3813e0ed97 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -333,12 +333,8 @@ fail: } -/** - * \param flags bitmask of PIPE_FLUSH_x flags - */ void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags, struct pipe_fence_handle **fence, const char *reason) { @@ -469,7 +465,7 @@ lp_setup_clear( struct lp_setup_context *setup, unsigned flags ) { if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) { - lp_setup_flush(setup, 0, NULL, __FUNCTION__); + lp_setup_flush(setup, NULL, __FUNCTION__); if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) assert(0); @@ -753,20 +749,20 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, /* check the render targets */ for (i = 0; i < setup->fb.nr_cbufs; i++) { if (setup->fb.cbufs[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE; } if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) { - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE; } /* check textures referenced by the scene */ for (i = 0; i < Elements(setup->scenes); i++) { if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) { - return PIPE_REFERENCED_FOR_READ; + return LP_REFERENCED_FOR_READ; } } - return PIPE_UNREFERENCED; + return LP_UNREFERENCED; } @@ -996,6 +992,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_destroy(scene); } + lp_fence_reference(&setup->last_fence, NULL); + FREE( setup ); } @@ -1062,6 +1060,8 @@ lp_setup_begin_query(struct lp_setup_context *setup, { /* init the query to its beginning state */ assert(setup->active_query == NULL); + + set_scene_state(setup, SETUP_ACTIVE, "begin_query"); if (setup->scene) { if (!lp_scene_bin_everywhere(setup->scene, @@ -1091,6 +1091,8 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { union lp_rast_cmd_arg dummy = { 0 }; + set_scene_state(setup, SETUP_ACTIVE, "end_query"); + assert(setup->active_query == pq); setup->active_query = NULL; @@ -1108,7 +1110,7 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_END_QUERY, dummy)) { - lp_setup_flush(setup, 0, NULL, __FUNCTION__); + lp_setup_flush(setup, NULL, __FUNCTION__); } } else { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0d6e161a21..8655259d27 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -64,7 +64,6 @@ lp_setup_clear(struct lp_setup_context *setup, void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags, struct pipe_fence_handle **fence, const char *reason); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2c4943a69f..6243a96f45 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -90,7 +90,6 @@ #include "lp_context.h" #include "lp_debug.h" #include "lp_perf.h" -#include "lp_screen.h" #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" @@ -335,7 +334,8 @@ generate_fs(struct gallivm_state *gallivm, /* Build the actual shader */ lp_build_tgsi_soa(gallivm, tokens, type, &mask, - consts_ptr, interp->pos, interp->inputs, + consts_ptr, NULL, /* sys values array */ + interp->pos, interp->inputs, outputs, sampler, &shader->info.base); /* Alpha test */ @@ -546,6 +546,7 @@ generate_fragment(struct llvmpipe_context *lp, unsigned i; unsigned chan; unsigned cbuf; + boolean cbuf0_write_all; /* Adjust color input interpolation according to flatshade state: */ @@ -559,6 +560,15 @@ generate_fragment(struct llvmpipe_context *lp, } } + /* check if writes to cbuf[0] are to be copied to all cbufs */ + cbuf0_write_all = FALSE; + for (i = 0;i < shader->info.base.num_properties; i++) { + if (shader->info.base.properties[i].name == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + cbuf0_write_all = TRUE; + break; + } + } /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -697,9 +707,10 @@ generate_fragment(struct llvmpipe_context *lp, mask_input, counter); - for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) - for(chan = 0; chan < NUM_CHANNELS; ++chan) - fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; + for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) + for (chan = 0; chan < NUM_CHANNELS; ++chan) + fs_out_color[cbuf][chan][i] = + out_color[cbuf * !cbuf0_write_all][chan]; } sampler->destroy(sampler); diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index ad751b9ef4..9f1ec146e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -651,7 +651,7 @@ generate_setup_variant(struct gallivm_state *gallivm, LLVMTypeRef arg_types[7]; LLVMBasicBlockRef block; LLVMBuilderRef builder = gallivm->builder; - int64_t t0, t1; + int64_t t0 = 0, t1; if (0) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index fb29423dd3..be86f66de9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -33,6 +33,8 @@ #include "lp_state.h" #include "draw/draw_context.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" static void * @@ -80,8 +82,9 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); - memcpy(llvmpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); - llvmpipe->num_vertex_buffers = count; + util_copy_vertex_buffers(llvmpipe->vertex_buffer, + &llvmpipe->num_vertex_buffers, + buffers, count); llvmpipe->dirty |= LP_NEW_VERTEX; @@ -112,4 +115,6 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; + + llvmpipe->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index e7e46a628a..f49638acf0 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -69,7 +69,6 @@ lp_resource_copy(struct pipe_context *pipe, llvmpipe_flush_resource(pipe, dst, dst_level, dstz, - 0, /* flush_flags */ FALSE, /* read_only */ TRUE, /* cpu_access */ FALSE, /* do_not_block */ @@ -77,7 +76,6 @@ lp_resource_copy(struct pipe_context *pipe, llvmpipe_flush_resource(pipe, src, src_level, src_box->z, - 0, /* flush_flags */ TRUE, /* read_only */ TRUE, /* cpu_access */ FALSE, /* do_not_block */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 149ee6f125..d229c62031 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -35,24 +35,13 @@ #include "util/u_cpu_detect.h" +#include "util/u_math.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_init.h" #include "lp_test.h" -#ifdef PIPE_CC_MSVC -static INLINE double -round(double x) -{ - if (x >= 0.0) - return floor(x + 0.5); - else - return ceil(x - 0.5); -} -#endif - - void dump_type(FILE *fp, struct lp_type type) diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 9753da5e57..fa4ce5bf2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -575,7 +575,6 @@ llvmpipe_get_transfer(struct pipe_context *pipe, if (!llvmpipe_flush_resource(pipe, resource, level, box->depth > 1 ? -1 : box->z, - 0, /* flush_flags */ read_only, TRUE, /* cpu_access */ do_not_block, @@ -695,7 +694,7 @@ llvmpipe_transfer_unmap(struct pipe_context *pipe, transfer->box.z); } -static unsigned int +unsigned int llvmpipe_is_resource_referenced( struct pipe_context *pipe, struct pipe_resource *presource, unsigned level, int layer) @@ -703,7 +702,7 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); if (presource->target == PIPE_BUFFER) - return PIPE_UNREFERENCED; + return LP_UNREFERENCED; return lp_setup_is_resource_referenced(llvmpipe->setup, presource); } @@ -1401,7 +1400,6 @@ llvmpipe_init_context_resource_funcs(struct pipe_context *pipe) pipe->transfer_destroy = llvmpipe_transfer_destroy; pipe->transfer_map = llvmpipe_transfer_map; pipe->transfer_unmap = llvmpipe_transfer_unmap; - pipe->is_resource_referenced = llvmpipe_is_resource_referenced; pipe->transfer_flush_region = u_default_transfer_flush_region; pipe->transfer_inline_write = u_default_transfer_inline_write; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index b789c0f409..b4a0dfd1c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -243,4 +243,14 @@ llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); extern void llvmpipe_init_context_texture_funcs(struct pipe_context *pipe); + +#define LP_UNREFERENCED 0 +#define LP_REFERENCED_FOR_READ (1 << 0) +#define LP_REFERENCED_FOR_WRITE (1 << 1) + +unsigned int +llvmpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *presource, + unsigned level, int layer); + #endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index c9c463f470..496b7f5ec6 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -30,10 +30,16 @@ #include <util/u_inlines.h> #include <util/u_format.h> #include "noop_public.h" -#include "state_tracker/sw_winsys.h" + +DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE) void noop_init_state_functions(struct pipe_context *ctx); +struct noop_pipe_screen { + struct pipe_screen pscreen; + struct pipe_screen *oscreen; +}; + /* * query */ @@ -81,13 +87,6 @@ struct noop_resource { struct sw_displaytarget *dt; }; -static unsigned noop_is_resource_referenced(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - return PIPE_UNREFERENCED; -} - static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) { @@ -108,52 +107,29 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, FREE(nresource); return NULL; } -#if 0 - if (nresource->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - unsigned stride; - - nresource->dt = winsys->displaytarget_create(winsys, nresource->base.bind, - nresource->base.format, - nresource->base.width0, - nresource->base.height0, - 16, &stride); - } -#endif return &nresource->base; } -static struct pipe_resource *noop_resource_from_handle(struct pipe_screen * screen, +static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, - struct winsys_handle *whandle) + struct winsys_handle *handle) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - struct noop_resource *nresource; - struct sw_displaytarget *dt; - unsigned stride; + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + struct pipe_resource *result; + struct pipe_resource *noop_resource; - dt = winsys->displaytarget_from_handle(winsys, templ, whandle, &stride); - if (dt == NULL) { - return NULL; - } - nresource = (struct noop_resource *)noop_resource_create(screen, templ); - nresource->dt = dt; - return &nresource->base; + result = oscreen->resource_from_handle(oscreen, templ, handle); + noop_resource = noop_resource_create(screen, result); + pipe_resource_reference(&result, NULL); + return noop_resource; } static boolean noop_resource_get_handle(struct pipe_screen *screen, struct pipe_resource *resource, struct winsys_handle *handle) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - struct noop_resource *nresource = (struct noop_resource *)resource; - - if (nresource->dt == NULL) - return FALSE; - - return winsys->displaytarget_get_handle(winsys, nresource->dt, handle); + return FALSE; } static void noop_resource_destroy(struct pipe_screen *screen, @@ -161,11 +137,6 @@ static void noop_resource_destroy(struct pipe_screen *screen, { struct noop_resource *nresource = (struct noop_resource *)resource; - if (nresource->dt) { - /* display target */ - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - winsys->displaytarget_destroy(winsys, nresource->dt); - } free(nresource->data); FREE(resource); } @@ -289,7 +260,7 @@ static void noop_resource_copy_region(struct pipe_context *ctx, /* * context */ -static void noop_flush(struct pipe_context *ctx, unsigned flags, +static void noop_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { } @@ -325,7 +296,6 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, void ctx->transfer_unmap = noop_transfer_unmap; ctx->transfer_destroy = noop_transfer_destroy; ctx->transfer_inline_write = noop_transfer_inline_write; - ctx->is_resource_referenced = noop_is_resource_referenced; noop_init_state_functions(ctx); return ctx; @@ -475,27 +445,37 @@ static boolean noop_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { return true; } static void noop_destroy_screen(struct pipe_screen *screen) { + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + oscreen->destroy(oscreen); FREE(screen); } -struct pipe_screen *noop_screen_create(struct sw_winsys *winsys) +struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) { + struct noop_pipe_screen *noop_screen; struct pipe_screen *screen; - screen = CALLOC_STRUCT(pipe_screen); - if (screen == NULL) { + if (!debug_get_option_noop()) { + return oscreen; + } + + noop_screen = CALLOC_STRUCT(noop_pipe_screen); + if (noop_screen == NULL) { return NULL; } + noop_screen->oscreen = oscreen; + screen = &noop_screen->pscreen; - screen->winsys = (struct pipe_winsys*)winsys; + screen->winsys = oscreen->winsys; screen->destroy = noop_destroy_screen; screen->get_name = noop_get_name; screen->get_vendor = noop_get_vendor; diff --git a/src/gallium/drivers/noop/noop_public.h b/src/gallium/drivers/noop/noop_public.h index 8ce82bec69..180ea597fa 100644 --- a/src/gallium/drivers/noop/noop_public.h +++ b/src/gallium/drivers/noop/noop_public.h @@ -23,8 +23,7 @@ #ifndef NOOP_PUBLIC_H #define NOOP_PUBLIC_H -struct sw_winsys; - -struct pipe_screen *noop_screen_create(struct sw_winsys *winsys); +struct pipe_screen; +struct pipe_screen *noop_screen_create(struct pipe_screen *screen); #endif diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c index ad324774c0..00a4c1eb01 100644 --- a/src/gallium/drivers/noop/noop_state.c +++ b/src/gallium/drivers/noop/noop_state.c @@ -28,6 +28,7 @@ #include <pipe/p_screen.h> #include <util/u_memory.h> #include <util/u_inlines.h> +#include "util/u_transfer.h" static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { @@ -287,4 +288,5 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->sampler_view_destroy = noop_sampler_view_destroy; ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; + ctx->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index db591b756c..3210d1ff77 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -4,8 +4,12 @@ include $(TOP)/configs/current LIBNAME = nouveau LIBRARY_INCLUDES = \ + $(LIBDRM_CFLAGS) \ -I$(TOP)/src/gallium/drivers/nouveau/include -C_SOURCES = nouveau_screen.c +C_SOURCES = nouveau_screen.c \ + nouveau_fence.c \ + nouveau_mm.c \ + nouveau_buffer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c new file mode 100644 index 0000000000..01d3aa46d0 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -0,0 +1,487 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "nouveau_screen.h" +#include "nouveau_context.h" +#include "nouveau_winsys.h" +#include "nouveau_fence.h" +#include "nouveau_buffer.h" +#include "nouveau_mm.h" + +struct nouveau_transfer { + struct pipe_transfer base; +}; + +static INLINE struct nouveau_transfer * +nouveau_transfer(struct pipe_transfer *transfer) +{ + return (struct nouveau_transfer *)transfer; +} + +static INLINE boolean +nouveau_buffer_allocate(struct nouveau_screen *screen, + struct nv04_resource *buf, unsigned domain) +{ + if (domain == NOUVEAU_BO_VRAM) { + buf->mm = nouveau_mm_allocate(screen->mm_VRAM, buf->base.width0, + &buf->bo, &buf->offset); + if (!buf->bo) + return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART); + } else + if (domain == NOUVEAU_BO_GART) { + buf->mm = nouveau_mm_allocate(screen->mm_GART, buf->base.width0, + &buf->bo, &buf->offset); + if (!buf->bo) + return FALSE; + } + if (domain != NOUVEAU_BO_GART) { + if (!buf->data) { + buf->data = MALLOC(buf->base.width0); + if (!buf->data) + return FALSE; + } + } + buf->domain = domain; + return TRUE; +} + +static INLINE void +release_allocation(struct nouveau_mm_allocation **mm, + struct nouveau_fence *fence) +{ + nouveau_fence_work(fence, nouveau_mm_free_work, *mm); + (*mm) = NULL; +} + +INLINE void +nouveau_buffer_release_gpu_storage(struct nv04_resource *buf) +{ + nouveau_bo_ref(NULL, &buf->bo); + + if (buf->mm) + release_allocation(&buf->mm, buf->fence); + + buf->domain = 0; +} + +static INLINE boolean +nouveau_buffer_reallocate(struct nouveau_screen *screen, + struct nv04_resource *buf, unsigned domain) +{ + nouveau_buffer_release_gpu_storage(buf); + + return nouveau_buffer_allocate(screen, buf, domain); +} + +static void +nouveau_buffer_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource) +{ + struct nv04_resource *res = nv04_resource(presource); + + nouveau_buffer_release_gpu_storage(res); + + if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) + FREE(res->data); + + FREE(res); +} + +/* Maybe just migrate to GART right away if we actually need to do this. */ +boolean +nouveau_buffer_download(struct nouveau_context *nv, struct nv04_resource *buf, + unsigned start, unsigned size) +{ + struct nouveau_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; + + assert(buf->domain == NOUVEAU_BO_VRAM); + + mm = nouveau_mm_allocate(nv->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nv->copy_data(nv, bounce, offset, NOUVEAU_BO_GART, + buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, size); + + if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data + start, bounce->map, size); + nouveau_bo_unmap(bounce); + + buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + nouveau_bo_ref(NULL, &bounce); + if (mm) + nouveau_mm_free(mm); + return TRUE; +} + +static boolean +nouveau_buffer_upload(struct nouveau_context *nv, struct nv04_resource *buf, + unsigned start, unsigned size) +{ + struct nouveau_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; + + if (size <= 192) { + nv->push_data(nv, buf->bo, buf->offset + start, buf->domain, + size, buf->data + start); + return TRUE; + } + + mm = nouveau_mm_allocate(nv->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nouveau_bo_map_range(bounce, offset, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + memcpy(bounce->map, buf->data + start, size); + nouveau_bo_unmap(bounce); + + nv->copy_data(nv, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + bounce, offset, NOUVEAU_BO_GART, size); + + nouveau_bo_ref(NULL, &bounce); + if (mm) + release_allocation(&mm, nv->screen->fence.current); + + if (start == 0 && size == buf->base.width0) + buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + return TRUE; +} + +static struct pipe_transfer * +nouveau_buffer_transfer_get(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, unsigned usage, + const struct pipe_box *box) +{ + struct nv04_resource *buf = nv04_resource(resource); + struct nouveau_context *nv = nouveau_context(pipe); + struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer); + if (!xfr) + return NULL; + + xfr->base.resource = resource; + xfr->base.box.x = box->x; + xfr->base.box.width = box->width; + xfr->base.usage = usage; + + if (buf->domain == NOUVEAU_BO_VRAM) { + if (usage & PIPE_TRANSFER_READ) { + if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) + nouveau_buffer_download(nv, buf, 0, buf->base.width0); + } + } + + return &xfr->base; +} + +static void +nouveau_buffer_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct nv04_resource *buf = nv04_resource(transfer->resource); + struct nouveau_transfer *xfr = nouveau_transfer(transfer); + struct nouveau_context *nv = nouveau_context(pipe); + + if (xfr->base.usage & PIPE_TRANSFER_WRITE) { + /* writing is worse */ + nouveau_buffer_adjust_score(nv, buf, -5000); + + if (buf->domain == NOUVEAU_BO_VRAM) { + nouveau_buffer_upload(nv, buf, transfer->box.x, transfer->box.width); + } + + if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER))) + nouveau_context(pipe)->vbo_dirty = TRUE; + } + + FREE(xfr); +} + +static INLINE boolean +nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) { + if (!buf->fence_wr) + return TRUE; + if (!nouveau_fence_wait(buf->fence_wr)) + return FALSE; + } else { + if (!buf->fence) + return TRUE; + if (!nouveau_fence_wait(buf->fence)) + return FALSE; + + nouveau_fence_ref(NULL, &buf->fence); + } + nouveau_fence_ref(NULL, &buf->fence_wr); + + return TRUE; +} + +static INLINE boolean +nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) + return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)); + else + return (buf->fence && !nouveau_fence_signalled(buf->fence)); +} + +static void * +nouveau_buffer_transfer_map(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct nouveau_transfer *xfr = nouveau_transfer(transfer); + struct nv04_resource *buf = nv04_resource(transfer->resource); + struct nouveau_bo *bo = buf->bo; + uint8_t *map; + int ret; + uint32_t offset = xfr->base.box.x; + uint32_t flags; + + nouveau_buffer_adjust_score(nouveau_context(pipe), buf, -250); + + if (buf->domain != NOUVEAU_BO_GART) + return buf->data + offset; + + if (buf->mm) + flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR; + else + flags = nouveau_screen_transfer_flags(xfr->base.usage); + + offset += buf->offset; + + ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags); + if (ret) + return NULL; + map = bo->map; + + /* Unmap right now. Since multiple buffers can share a single nouveau_bo, + * not doing so might make future maps fail or trigger "reloc while mapped" + * errors. For now, mappings to userspace are guaranteed to be persistent. + */ + nouveau_bo_unmap(bo); + + if (buf->mm) { + if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) { + if (nouveau_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE)) + return NULL; + } else + if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + nouveau_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE); + } + } + return map; +} + + + +static void +nouveau_buffer_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct nv04_resource *res = nv04_resource(transfer->resource); + struct nouveau_bo *bo = res->bo; + unsigned offset = res->offset + transfer->box.x + box->x; + + /* not using non-snoop system memory yet, no need for cflush */ + if (1) + return; + + /* XXX: maybe need to upload for VRAM buffers here */ + + nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width); +} + +static void +nouveau_buffer_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + /* we've called nouveau_bo_unmap right after map */ +} + +const struct u_resource_vtbl nouveau_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + nouveau_buffer_destroy, /* resource_destroy */ + nouveau_buffer_transfer_get, /* get_transfer */ + nouveau_buffer_transfer_destroy, /* transfer_destroy */ + nouveau_buffer_transfer_map, /* transfer_map */ + nouveau_buffer_transfer_flush_region, /* transfer_flush_region */ + nouveau_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource * +nouveau_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_screen *screen = nouveau_screen(pscreen); + struct nv04_resource *buffer; + boolean ret; + + buffer = CALLOC_STRUCT(nv04_resource); + if (!buffer) + return NULL; + + buffer->base = *templ; + buffer->vtbl = &nouveau_buffer_vtbl; + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = pscreen; + + if ((buffer->base.bind & screen->sysmem_bindings) == screen->sysmem_bindings) + ret = nouveau_buffer_allocate(screen, buffer, 0); + else + ret = nouveau_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); + + if (ret == FALSE) + goto fail; + + return &buffer->base; + +fail: + FREE(buffer); + return NULL; +} + + +struct pipe_resource * +nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr, + unsigned bytes, unsigned bind) +{ + struct nv04_resource *buffer; + + buffer = CALLOC_STRUCT(nv04_resource); + if (!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->vtbl = &nouveau_buffer_vtbl; + buffer->base.screen = pscreen; + buffer->base.format = PIPE_FORMAT_R8_UNORM; + buffer->base.usage = PIPE_USAGE_IMMUTABLE; + buffer->base.bind = bind; + buffer->base.width0 = bytes; + buffer->base.height0 = 1; + buffer->base.depth0 = 1; + + buffer->data = ptr; + buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY; + + return &buffer->base; +} + +/* Like download, but for GART buffers. Merge ? */ +static INLINE boolean +nouveau_buffer_data_fetch(struct nv04_resource *buf, struct nouveau_bo *bo, + unsigned offset, unsigned size) +{ + if (!buf->data) { + buf->data = MALLOC(size); + if (!buf->data) + return FALSE; + } + if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data, bo->map, size); + nouveau_bo_unmap(bo); + + return TRUE; +} + +/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ +boolean +nouveau_buffer_migrate(struct nouveau_context *nv, + struct nv04_resource *buf, const unsigned new_domain) +{ + struct nouveau_screen *screen = nv->screen; + struct nouveau_bo *bo; + const unsigned old_domain = buf->domain; + unsigned size = buf->base.width0; + unsigned offset; + int ret; + + assert(new_domain != old_domain); + + if (new_domain == NOUVEAU_BO_GART && old_domain == 0) { + if (!nouveau_buffer_allocate(screen, buf, new_domain)) + return FALSE; + ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR | + NOUVEAU_BO_NOSYNC); + if (ret) + return ret; + memcpy(buf->bo->map, buf->data, size); + nouveau_bo_unmap(buf->bo); + FREE(buf->data); + } else + if (old_domain != 0 && new_domain != 0) { + struct nouveau_mm_allocation *mm = buf->mm; + + if (new_domain == NOUVEAU_BO_VRAM) { + /* keep a system memory copy of our data in case we hit a fallback */ + if (!nouveau_buffer_data_fetch(buf, buf->bo, buf->offset, size)) + return FALSE; + debug_printf("migrating %u KiB to VRAM\n", size / 1024); + } + + offset = buf->offset; + bo = buf->bo; + buf->bo = NULL; + buf->mm = NULL; + nouveau_buffer_allocate(screen, buf, new_domain); + + nv->copy_data(nv, buf->bo, buf->offset, new_domain, + bo, offset, old_domain, buf->base.width0); + + nouveau_bo_ref(NULL, &bo); + if (mm) + release_allocation(&mm, screen->fence.current); + } else + if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) { + if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM)) + return FALSE; + if (!nouveau_buffer_upload(nv, buf, 0, buf->base.width0)) + return FALSE; + } else + return FALSE; + + assert(buf->domain == new_domain); + return TRUE; +} + +/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. + * We'd like to only allocate @size bytes here, but then we'd have to rebase + * the vertex indices ... + */ +boolean +nouveau_user_buffer_upload(struct nv04_resource *buf, + unsigned base, unsigned size) +{ + struct nouveau_screen *screen = nouveau_screen(buf->base.screen); + int ret; + + assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY); + + buf->base.width0 = base + size; + if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART)) + return FALSE; + + ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + if (ret) + return FALSE; + memcpy(buf->bo->map, buf->data + base, size); + nouveau_bo_unmap(buf->bo); + + return TRUE; +} diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h new file mode 100644 index 0000000000..46e3554bdf --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h @@ -0,0 +1,139 @@ +#ifndef __NOUVEAU_BUFFER_H__ +#define __NOUVEAU_BUFFER_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" + +struct pipe_resource; +struct nouveau_context; +struct nouveau_bo; + +#define NOUVEAU_BUFFER_SCORE_MIN -25000 +#define NOUVEAU_BUFFER_SCORE_MAX 25000 +#define NOUVEAU_BUFFER_SCORE_VRAM_THRESHOLD 20000 + +/* DIRTY: buffer was (or will be after the next flush) written to by GPU and + * resource->data has not been updated to reflect modified VRAM contents + * + * USER_MEMORY: resource->data is a pointer to client memory and may change + * between GL calls + */ +#define NOUVEAU_BUFFER_STATUS_GPU_READING (1 << 0) +#define NOUVEAU_BUFFER_STATUS_GPU_WRITING (1 << 1) +#define NOUVEAU_BUFFER_STATUS_USER_MEMORY (1 << 7) + +/* Resources, if mapped into the GPU's address space, are guaranteed to + * have constant virtual addresses (nv50+). + * + * The address of a resource will lie within the nouveau_bo referenced, + * and this bo should be added to the memory manager's validation list. + */ +struct nv04_resource { + struct pipe_resource base; + const struct u_resource_vtbl *vtbl; + + uint8_t *data; + struct nouveau_bo *bo; + uint32_t offset; + + uint8_t status; + uint8_t domain; + + int16_t score; /* low if mapped very often, if high can move to VRAM */ + + struct nouveau_fence *fence; + struct nouveau_fence *fence_wr; + + struct nouveau_mm_allocation *mm; +}; + +void +nouveau_buffer_release_gpu_storage(struct nv04_resource *); + +boolean +nouveau_buffer_download(struct nouveau_context *, struct nv04_resource *, + unsigned start, unsigned size); + +boolean +nouveau_buffer_migrate(struct nouveau_context *, + struct nv04_resource *, unsigned domain); + +static INLINE void +nouveau_buffer_adjust_score(struct nouveau_context *pipe, + struct nv04_resource *res, int16_t score) +{ + if (score < 0) { + if (res->score > NOUVEAU_BUFFER_SCORE_MIN) + res->score += score; + } else + if (score > 0){ + if (res->score < NOUVEAU_BUFFER_SCORE_MAX) + res->score += score; + if (res->domain == NOUVEAU_BO_GART && + res->score > NOUVEAU_BUFFER_SCORE_VRAM_THRESHOLD) + nouveau_buffer_migrate(pipe, res, NOUVEAU_BO_VRAM); + } +} + +/* XXX: wait for fence (atm only using this for vertex push) */ +static INLINE void * +nouveau_resource_map_offset(struct nouveau_context *pipe, + struct nv04_resource *res, uint32_t offset, + uint32_t flags) +{ + void *map; + + nouveau_buffer_adjust_score(pipe, res, -250); + + if ((res->domain == NOUVEAU_BO_VRAM) && + (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING)) + nouveau_buffer_download(pipe, res, 0, res->base.width0); + + if ((res->domain != NOUVEAU_BO_GART) || + (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) + return res->data + offset; + + if (res->mm) + flags |= NOUVEAU_BO_NOSYNC; + + if (nouveau_bo_map_range(res->bo, res->offset + offset, + res->base.width0, flags)) + return NULL; + + map = res->bo->map; + nouveau_bo_unmap(res->bo); + return map; +} + +static INLINE void +nouveau_resource_unmap(struct nv04_resource *res) +{ + /* no-op */ +} + +static INLINE struct nv04_resource * +nv04_resource(struct pipe_resource *resource) +{ + return (struct nv04_resource *)resource; +} + +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ +static INLINE boolean +nouveau_resource_mapped_by_gpu(struct pipe_resource *resource) +{ + return nv04_resource(resource)->domain != 0; +} + +struct pipe_resource * +nouveau_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ); + +struct pipe_resource * +nouveau_user_buffer_create(struct pipe_screen *screen, void *ptr, + unsigned bytes, unsigned usage); + +boolean +nouveau_user_buffer_upload(struct nv04_resource *, unsigned base, + unsigned size); + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h new file mode 100644 index 0000000000..696e0d3f24 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -0,0 +1,26 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +#include "pipe/p_context.h" + +struct nouveau_context { + struct pipe_context pipe; + struct nouveau_screen *screen; + + boolean vbo_dirty; + + void (*copy_data)(struct nouveau_context *, + struct nouveau_bo *dst, unsigned, unsigned, + struct nouveau_bo *src, unsigned, unsigned, unsigned); + void (*push_data)(struct nouveau_context *, + struct nouveau_bo *dst, unsigned, unsigned, + unsigned, void *); +}; + +static INLINE struct nouveau_context * +nouveau_context(struct pipe_context *pipe) +{ + return (struct nouveau_context *)pipe; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c new file mode 100644 index 0000000000..d8f59dce9e --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_fence.c @@ -0,0 +1,223 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "util/u_double_list.h" + +#include "nouveau_screen.h" +#include "nouveau_fence.h" + +#include "nouveau/nouveau_pushbuf.h" + +#ifdef PIPE_OS_UNIX +#include <sched.h> +#endif + +boolean +nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence, + boolean emit) +{ + *fence = CALLOC_STRUCT(nouveau_fence); + if (!*fence) + return FALSE; + + (*fence)->screen = screen; + (*fence)->ref = 1; + LIST_INITHEAD(&(*fence)->work); + + if (emit) + nouveau_fence_emit(*fence); + + return TRUE; +} + +static void +nouveau_fence_trigger_work(struct nouveau_fence *fence) +{ + struct nouveau_fence_work *work, *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(work, tmp, &fence->work, list) { + work->func(work->data); + LIST_DEL(&work->list); + FREE(work); + } +} + +boolean +nouveau_fence_work(struct nouveau_fence *fence, + void (*func)(void *), void *data) +{ + struct nouveau_fence_work *work; + + if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) { + func(data); + return TRUE; + } + + work = CALLOC_STRUCT(nouveau_fence_work); + if (!work) + return FALSE; + work->func = func; + work->data = data; + LIST_ADD(&work->list, &fence->work); + return TRUE; +} + +void +nouveau_fence_emit(struct nouveau_fence *fence) +{ + struct nouveau_screen *screen = fence->screen; + + fence->sequence = ++screen->fence.sequence; + + assert(fence->state == NOUVEAU_FENCE_STATE_AVAILABLE); + + /* set this now, so that if fence.emit triggers a flush we don't recurse */ + fence->state = NOUVEAU_FENCE_STATE_EMITTED; + + screen->fence.emit(&screen->base, fence->sequence); + + ++fence->ref; + + if (screen->fence.tail) + screen->fence.tail->next = fence; + else + screen->fence.head = fence; + + screen->fence.tail = fence; +} + +void +nouveau_fence_del(struct nouveau_fence *fence) +{ + struct nouveau_fence *it; + struct nouveau_screen *screen = fence->screen; + + if (fence->state == NOUVEAU_FENCE_STATE_EMITTED || + fence->state == NOUVEAU_FENCE_STATE_FLUSHED) { + if (fence == screen->fence.head) { + screen->fence.head = fence->next; + if (!screen->fence.head) + screen->fence.tail = NULL; + } else { + for (it = screen->fence.head; it && it->next != fence; it = it->next); + it->next = fence->next; + if (screen->fence.tail == fence) + screen->fence.tail = it; + } + } + + if (!LIST_IS_EMPTY(&fence->work)) { + debug_printf("WARNING: deleting fence with work still pending !\n"); + nouveau_fence_trigger_work(fence); + } + + FREE(fence); +} + +void +nouveau_fence_update(struct nouveau_screen *screen, boolean flushed) +{ + struct nouveau_fence *fence; + struct nouveau_fence *next = NULL; + u32 sequence = screen->fence.update(&screen->base); + + if (screen->fence.sequence_ack == sequence) + return; + screen->fence.sequence_ack = sequence; + + for (fence = screen->fence.head; fence; fence = next) { + next = fence->next; + sequence = fence->sequence; + + fence->state = NOUVEAU_FENCE_STATE_SIGNALLED; + + nouveau_fence_trigger_work(fence); + nouveau_fence_ref(NULL, &fence); + + if (sequence == screen->fence.sequence_ack) + break; + } + screen->fence.head = next; + if (!next) + screen->fence.tail = NULL; + + if (flushed) { + for (fence = next; fence; fence = fence->next) + fence->state = NOUVEAU_FENCE_STATE_FLUSHED; + } +} + +#define NOUVEAU_FENCE_MAX_SPINS (1 << 31) + +boolean +nouveau_fence_signalled(struct nouveau_fence *fence) +{ + struct nouveau_screen *screen = fence->screen; + + if (fence->state >= NOUVEAU_FENCE_STATE_EMITTED) + nouveau_fence_update(screen, FALSE); + + return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED; +} + +boolean +nouveau_fence_wait(struct nouveau_fence *fence) +{ + struct nouveau_screen *screen = fence->screen; + uint32_t spins = 0; + + if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) { + nouveau_fence_emit(fence); + + if (fence == screen->fence.current) + nouveau_fence_new(screen, &screen->fence.current, FALSE); + } + if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED) + FIRE_RING(screen->channel); + + do { + nouveau_fence_update(screen, FALSE); + + if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) + return TRUE; + spins++; +#ifdef PIPE_OS_UNIX + if (!(spins % 8)) /* donate a few cycles */ + sched_yield(); +#endif + } while (spins < NOUVEAU_FENCE_MAX_SPINS); + + debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n", + fence->sequence, + screen->fence.sequence_ack, screen->fence.sequence); + + return FALSE; +} + +void +nouveau_fence_next(struct nouveau_screen *screen) +{ + if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTED) + nouveau_fence_emit(screen->fence.current); + + nouveau_fence_new(screen, &screen->fence.current, FALSE); +} diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h new file mode 100644 index 0000000000..680c75e99f --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_fence.h @@ -0,0 +1,59 @@ + +#ifndef __NOUVEAU_FENCE_H__ +#define __NOUVEAU_FENCE_H__ + +#include "util/u_inlines.h" +#include "util/u_double_list.h" + +#define NOUVEAU_FENCE_STATE_AVAILABLE 0 +#define NOUVEAU_FENCE_STATE_EMITTED 1 +#define NOUVEAU_FENCE_STATE_FLUSHED 2 +#define NOUVEAU_FENCE_STATE_SIGNALLED 3 + +struct nouveau_fence_work { + struct list_head list; + void (*func)(void *); + void *data; +}; + +struct nouveau_fence { + struct nouveau_fence *next; + struct nouveau_screen *screen; + int state; + int ref; + uint32_t sequence; + struct list_head work; +}; + +void nouveau_fence_emit(struct nouveau_fence *); +void nouveau_fence_del(struct nouveau_fence *); + +boolean nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **, + boolean emit); +boolean nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *); +void nouveau_fence_update(struct nouveau_screen *, boolean flushed); +void nouveau_fence_next(struct nouveau_screen *); +boolean nouveau_fence_wait(struct nouveau_fence *); +boolean nouveau_fence_signalled(struct nouveau_fence *); + +static INLINE void +nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref) +{ + if (fence) + ++fence->ref; + + if (*ref) { + if (--(*ref)->ref == 0) + nouveau_fence_del(*ref); + } + + *ref = fence; +} + +static INLINE struct nouveau_fence * +nouveau_fence(struct pipe_fence_handle *fence) +{ + return (struct nouveau_fence *)fence; +} + +#endif // __NOUVEAU_FENCE_H__ diff --git a/src/gallium/drivers/nouveau/nouveau_mm.c b/src/gallium/drivers/nouveau/nouveau_mm.c new file mode 100644 index 0000000000..7edeb4d21d --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_mm.c @@ -0,0 +1,288 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "nouveau_screen.h" +#include "nouveau_mm.h" + +#include "nouveau/nouveau_bo.h" + +#define MM_MIN_ORDER 7 +#define MM_MAX_ORDER 20 + +#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) + +#define MM_MIN_SIZE (1 << MM_MIN_ORDER) +#define MM_MAX_SIZE (1 << MM_MAX_ORDER) + +struct mm_bucket { + struct list_head free; + struct list_head used; + struct list_head full; + int num_free; +}; + +struct nouveau_mman { + struct nouveau_device *dev; + struct mm_bucket bucket[MM_NUM_BUCKETS]; + uint32_t storage_type; + uint32_t domain; + uint64_t allocated; +}; + +struct mm_slab { + struct list_head head; + struct nouveau_bo *bo; + struct nouveau_mman *cache; + int order; + int count; + int free; + uint32_t bits[0]; +}; + +static int +mm_slab_alloc(struct mm_slab *slab) +{ + int i, n, b; + + if (slab->free == 0) + return -1; + + for (i = 0; i < (slab->count + 31) / 32; ++i) { + b = ffs(slab->bits[i]) - 1; + if (b >= 0) { + n = i * 32 + b; + assert(n < slab->count); + slab->free--; + slab->bits[i] &= ~(1 << b); + return n; + } + } + return -1; +} + +static INLINE void +mm_slab_free(struct mm_slab *slab, int i) +{ + assert(i < slab->count); + slab->bits[i / 32] |= 1 << (i % 32); + slab->free++; + assert(slab->free <= slab->count); +} + +static INLINE int +mm_get_order(uint32_t size) +{ + int s = __builtin_clz(size) ^ 31; + + if (size > (1 << s)) + s += 1; + return s; +} + +static struct mm_bucket * +mm_bucket_by_order(struct nouveau_mman *cache, int order) +{ + if (order > MM_MAX_ORDER) + return NULL; + return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; +} + +static struct mm_bucket * +mm_bucket_by_size(struct nouveau_mman *cache, unsigned size) +{ + return mm_bucket_by_order(cache, mm_get_order(size)); +} + +/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ +static INLINE uint32_t +mm_default_slab_size(unsigned chunk_order) +{ + static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = + { + 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 + }; + + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; +} + +static int +mm_slab_new(struct nouveau_mman *cache, int chunk_order) +{ + struct mm_slab *slab; + int words, ret; + const uint32_t size = mm_default_slab_size(chunk_order); + + words = ((size >> chunk_order) + 31) / 32; + assert(words); + + slab = MALLOC(sizeof(struct mm_slab) + words * 4); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(&slab->bits[0], ~0, words * 4); + + slab->bo = NULL; + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, + 0, cache->storage_type, &slab->bo); + if (ret) { + FREE(slab); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + LIST_INITHEAD(&slab->head); + + slab->cache = cache; + slab->order = chunk_order; + slab->count = slab->free = size >> chunk_order; + + LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); + + cache->allocated += size; + + debug_printf("MM: new slab, total memory = %lu KiB\n", + cache->allocated / 1024); + + return PIPE_OK; +} + +/* @return token to identify slab or NULL if we just allocated a new bo */ +struct nouveau_mm_allocation * +nouveau_mm_allocate(struct nouveau_mman *cache, + uint32_t size, struct nouveau_bo **bo, uint32_t *offset) +{ + struct mm_bucket *bucket; + struct mm_slab *slab; + struct nouveau_mm_allocation *alloc; + int ret; + + bucket = mm_bucket_by_size(cache, size); + if (!bucket) { + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, + 0, cache->storage_type, bo); + if (ret) + debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); + + *offset = 0; + return NULL; + } + + if (!LIST_IS_EMPTY(&bucket->used)) { + slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); + } else { + if (LIST_IS_EMPTY(&bucket->free)) { + mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); + } + slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); + + LIST_DEL(&slab->head); + LIST_ADD(&slab->head, &bucket->used); + } + + *offset = mm_slab_alloc(slab) << slab->order; + + alloc = MALLOC_STRUCT(nouveau_mm_allocation); + if (!alloc) + return NULL; + + nouveau_bo_ref(slab->bo, bo); + + if (slab->free == 0) { + LIST_DEL(&slab->head); + LIST_ADD(&slab->head, &bucket->full); + } + + alloc->next = NULL; + alloc->offset = *offset; + alloc->priv = (void *)slab; + + return alloc; +} + +void +nouveau_mm_free(struct nouveau_mm_allocation *alloc) +{ + struct mm_slab *slab = (struct mm_slab *)alloc->priv; + struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); + + mm_slab_free(slab, alloc->offset >> slab->order); + + if (slab->free == 1) { + LIST_DEL(&slab->head); + + if (slab->count > 1) + LIST_ADDTAIL(&slab->head, &bucket->used); + else + LIST_ADDTAIL(&slab->head, &bucket->free); + } + + FREE(alloc); +} + +void +nouveau_mm_free_work(void *data) +{ + nouveau_mm_free(data); +} + +struct nouveau_mman * +nouveau_mm_create(struct nouveau_device *dev, uint32_t domain, + uint32_t storage_type) +{ + struct nouveau_mman *cache = MALLOC_STRUCT(nouveau_mman); + int i; + + if (!cache) + return NULL; + + cache->dev = dev; + cache->domain = domain; + cache->storage_type = storage_type; + cache->allocated = 0; + + for (i = 0; i < MM_NUM_BUCKETS; ++i) { + LIST_INITHEAD(&cache->bucket[i].free); + LIST_INITHEAD(&cache->bucket[i].used); + LIST_INITHEAD(&cache->bucket[i].full); + } + + return cache; +} + +static INLINE void +nouveau_mm_free_slabs(struct list_head *head) +{ + struct mm_slab *slab, *next; + + LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) { + LIST_DEL(&slab->head); + nouveau_bo_ref(NULL, &slab->bo); + FREE(slab); + } +} + +void +nouveau_mm_destroy(struct nouveau_mman *cache) +{ + int i; + + if (!cache) + return; + + for (i = 0; i < MM_NUM_BUCKETS; ++i) { + if (!LIST_IS_EMPTY(&cache->bucket[i].used) || + !LIST_IS_EMPTY(&cache->bucket[i].full)) + debug_printf("WARNING: destroying GPU memory cache " + "with some buffers still in use\n"); + + nouveau_mm_free_slabs(&cache->bucket[i].free); + nouveau_mm_free_slabs(&cache->bucket[i].used); + nouveau_mm_free_slabs(&cache->bucket[i].full); + } + + FREE(cache); +} + diff --git a/src/gallium/drivers/nouveau/nouveau_mm.h b/src/gallium/drivers/nouveau/nouveau_mm.h new file mode 100644 index 0000000000..5b57c8ba4f --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_mm.h @@ -0,0 +1,32 @@ +#ifndef __NOUVEAU_MM_H__ +#define __NOUVEAU_MM_H__ + +struct nouveau_mman; + +/* Since a resource can be migrated, we need to decouple allocations from + * them. This struct is linked with fences for delayed freeing of allocs. + */ +struct nouveau_mm_allocation { + struct nouveau_mm_allocation *next; + void *priv; + uint32_t offset; +}; + +extern struct nouveau_mman * +nouveau_mm_create(struct nouveau_device *, uint32_t domain, + uint32_t storage_type); + +extern void +nouveau_mm_destroy(struct nouveau_mman *); + +extern struct nouveau_mm_allocation * +nouveau_mm_allocate(struct nouveau_mman *, uint32_t size, + struct nouveau_bo **, uint32_t *offset); + +extern void +nouveau_mm_free(struct nouveau_mm_allocation *); + +extern void +nouveau_mm_free_work(void *); + +#endif // __NOUVEAU_MM_H__ diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index a9426df686..4cd82a51c1 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -14,6 +14,7 @@ #include "nouveau/nouveau_bo.h" #include "nouveau_winsys.h" #include "nouveau_screen.h" +#include "nouveau_fence.h" /* XXX this should go away */ #include "state_tracker/drm_driver.h" @@ -150,23 +151,22 @@ nouveau_screen_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence) { - *ptr = pfence; + nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr); } -static int +static boolean nouveau_screen_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *pfence, - unsigned flags) + struct pipe_fence_handle *pfence) { - return 0; + return nouveau_fence_signalled(nouveau_fence(pfence)); } -static int +static boolean nouveau_screen_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *pfence, - unsigned flags) + uint64_t timeout) { - return 0; + return nouveau_fence_wait(nouveau_fence(pfence)); } @@ -209,26 +209,6 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, } } - -unsigned int -nouveau_reference_flags(struct nouveau_bo *bo) -{ - uint32_t bo_flags; - int flags = 0; - - bo_flags = nouveau_bo_pending(bo); - if (bo_flags & NOUVEAU_BO_RD) - flags |= PIPE_REFERENCED_FOR_READ; - if (bo_flags & NOUVEAU_BO_WR) - flags |= PIPE_REFERENCED_FOR_WRITE; - - return flags; -} - - - - - int nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) { @@ -250,6 +230,10 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) util_format_s3tc_init(); + screen->mm_GART = nouveau_mm_create(dev, + NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + 0x000); + screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); return 0; } @@ -257,7 +241,12 @@ void nouveau_screen_fini(struct nouveau_screen *screen) { struct pipe_winsys *ws = screen->base.winsys; + + nouveau_mm_destroy(screen->mm_GART); + nouveau_mm_destroy(screen->mm_VRAM); + nouveau_channel_free(&screen->channel); + if (ws) ws->destroy(ws); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 8c290273fb..186ada3967 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -2,6 +2,10 @@ #define __NOUVEAU_SCREEN_H__ #include "pipe/p_screen.h" +#include "util/u_memory.h" +typedef uint32_t u32; + +struct nouveau_bo; struct nouveau_screen { struct pipe_screen base; @@ -12,6 +16,20 @@ struct nouveau_screen { * these almost always should be set to the same value */ unsigned vertex_buffer_flags; unsigned index_buffer_flags; + unsigned sysmem_bindings; + + struct { + struct nouveau_fence *head; + struct nouveau_fence *tail; + struct nouveau_fence *current; + u32 sequence; + u32 sequence_ack; + void (*emit)(struct pipe_screen *, u32 sequence); + u32 (*update)(struct pipe_screen *); + } fence; + + struct nouveau_mman *mm_VRAM; + struct nouveau_mman *mm_GART; }; static INLINE struct nouveau_screen * @@ -56,17 +74,13 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle, unsigned *out_stride); -unsigned int -nouveau_reference_flags(struct nouveau_bo *bo); - - int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); void nouveau_screen_fini(struct nouveau_screen *); - +#ifndef NOUVEAU_NVC0 static INLINE unsigned RING_3D(unsigned mthd, unsigned size) { @@ -78,5 +92,6 @@ RING_3D_NI(unsigned mthd, unsigned size) { return 0x40000000 | (7 << 13) | (size << 18) | mthd; } +#endif #endif diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h deleted file mode 100644 index e920cf9f3b..0000000000 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ /dev/null @@ -1,316 +0,0 @@ -#ifndef __NOUVEAU_STATEOBJ_H__ -#define __NOUVEAU_STATEOBJ_H__ - -#include "util/u_debug.h" - -#ifdef DEBUG -#define DEBUG_NOUVEAU_STATEOBJ -#endif /* DEBUG */ - -struct nouveau_stateobj_reloc { - struct nouveau_bo *bo; - - struct nouveau_grobj *gr; - uint32_t push_offset; - uint32_t mthd; - - uint32_t data; - unsigned flags; - unsigned vor; - unsigned tor; -}; - -struct nouveau_stateobj_start { - struct nouveau_grobj *gr; - uint32_t mthd; - uint32_t size; - unsigned offset; -}; - -struct nouveau_stateobj { - struct pipe_reference reference; - - struct nouveau_stateobj_start *start; - struct nouveau_stateobj_reloc *reloc; - - /* Common memory pool for data. */ - uint32_t *pool; - unsigned pool_cur; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - unsigned start_alloc; - unsigned reloc_alloc; - unsigned pool_alloc; -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - unsigned total; /* includes begin_ring */ - unsigned cur; /* excludes begin_ring, offset from "cur_start" */ - unsigned cur_start; - unsigned cur_reloc; -}; - -static INLINE void -so_dump(struct nouveau_stateobj *so) -{ - unsigned i, nr, total = 0; - - for (i = 0; i < so->cur_start; i++) { - if (so->start[i].gr->subc > -1) - debug_printf("+0x%04x: 0x%08x\n", total++, - (so->start[i].size << 18) | (so->start[i].gr->subc << 13) - | so->start[i].mthd); - else - debug_printf("+0x%04x: 0x%08x\n", total++, - (so->start[i].size << 18) | so->start[i].mthd); - for (nr = 0; nr < so->start[i].size; nr++, total++) - debug_printf("+0x%04x: 0x%08x\n", total, - so->pool[so->start[i].offset + nr]); - } -} - -static INLINE struct nouveau_stateobj * -so_new(unsigned start, unsigned push, unsigned reloc) -{ - struct nouveau_stateobj *so; - - so = MALLOC(sizeof(struct nouveau_stateobj)); - pipe_reference_init(&so->reference, 1); - so->total = so->cur = so->cur_start = so->cur_reloc = 0; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - so->start_alloc = start; - so->reloc_alloc = reloc; - so->pool_alloc = push; -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start)); - so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc)); - so->pool = MALLOC(push * sizeof(uint32_t)); - so->pool_cur = 0; - - if (!so->start || !so->reloc || !so->pool) { - debug_printf("malloc failed\n"); - assert(0); - } - - return so; -} - -static INLINE void -so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) -{ - struct nouveau_stateobj *so = *pso; - int i; - - if (pipe_reference(&(*pso)->reference, &ref->reference)) { - FREE(so->start); - for (i = 0; i < so->cur_reloc; i++) - nouveau_bo_ref(NULL, &so->reloc[i].bo); - FREE(so->reloc); - FREE(so->pool); - FREE(so); - } - *pso = ref; -} - -static INLINE void -so_data(struct nouveau_stateobj *so, uint32_t data) -{ -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (so->cur >= so->start[so->cur_start - 1].size) { - debug_printf("exceeding specified size\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data; -} - -static INLINE void -so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size) -{ -#ifdef DEBUG_NOUVEAU_STATEOBJ - if ((so->cur + size) > so->start[so->cur_start - 1].size) { - debug_printf("exceeding specified size\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - while (size--) - so->pool[so->start[so->cur_start - 1].offset + so->cur++] = - *data++; -} - -static INLINE void -so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, - unsigned mthd, unsigned size) -{ - struct nouveau_stateobj_start *start; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (so->start_alloc <= so->cur_start) { - debug_printf("exceeding num_start size\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - start = so->start; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { - debug_printf("previous so_method was not filled\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - start[so->cur_start].gr = gr; - start[so->cur_start].mthd = mthd; - start[so->cur_start].size = size; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (so->pool_alloc < (size + so->pool_cur)) { - debug_printf("exceeding num_pool size\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - start[so->cur_start].offset = so->pool_cur; - so->pool_cur += size; - - so->cur_start++; - /* The 1 is for *this* begin_ring. */ - so->total += so->cur + 1; - so->cur = 0; -} - -static INLINE void -so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, - unsigned data, unsigned flags, unsigned vor, unsigned tor) -{ - struct nouveau_stateobj_reloc *r; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (so->reloc_alloc <= so->cur_reloc) { - debug_printf("exceeding num_reloc size\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - r = so->reloc; - - r[so->cur_reloc].bo = NULL; - nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); - r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; - r[so->cur_reloc].push_offset = so->total + so->cur; - r[so->cur_reloc].data = data; - r[so->cur_reloc].flags = flags; - r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd + - (so->cur << 2); - r[so->cur_reloc].vor = vor; - r[so->cur_reloc].tor = tor; - - so_data(so, data); - so->cur_reloc++; -} - -/* Determine if this buffer object is referenced by this state object. */ -static INLINE boolean -so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo) -{ - int i; - - for (i = 0; i < so->cur_reloc; i++) - if (so->reloc[i].bo == bo) - return true; - - return false; -} - -static INLINE void -so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so) -{ - unsigned nr, i; - int ret = 0; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (so->start[so->cur_start - 1].size > so->cur) { - debug_printf("emit: previous so_method was not filled\n"); - assert(0); - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - /* We cannot update total in case we so_emit again. */ - nr = so->total + so->cur; - - /* This will flush if we need space. - * We don't actually need the marker. - */ - if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) { - debug_printf("so_emit failed marker emit with error %d\n", ret); - assert(0); - } - - /* Submit data. This will ensure proper binding of objects. */ - for (i = 0; i < so->cur_start; i++) { - BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size); - OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size); - } - - for (i = 0; i < so->cur_reloc; i++) { - struct nouveau_stateobj_reloc *r = &so->reloc[i]; - - if ((ret = nouveau_pushbuf_emit_reloc(chan, chan->cur - nr + - r->push_offset, r->bo, r->data, - 0, r->flags, r->vor, r->tor))) { - debug_printf("so_emit failed reloc with error %d\n", ret); - assert(0); - } - } -} - -static INLINE void -so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so) -{ - unsigned i; - int ret = 0; - - if (!so) - return; - - /* If we need to flush in flush notify, then we have a problem anyway. */ - for (i = 0; i < so->cur_reloc; i++) { - struct nouveau_stateobj_reloc *r = &so->reloc[i]; - -#ifdef DEBUG_NOUVEAU_STATEOBJ - if (r->mthd & 0x40000000) { - debug_printf("error: NI mthd 0x%08X\n", r->mthd); - continue; - } -#endif /* DEBUG_NOUVEAU_STATEOBJ */ - - /* We don't need to autobind, since there are enough subchannels - * for all objects we use. If this is changed, account for the extra - * space in callers of this function. - */ - assert(r->gr->bound != NOUVEAU_GROBJ_UNBOUND); - - /* Some relocs really don't like to be hammered, - * NOUVEAU_BO_DUMMY makes sure it only - * happens when needed. - */ - ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) | - r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART - | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0); - if (ret) { - debug_printf("OUT_RELOC failed %d\n", ret); - assert(0); - } - - ret = OUT_RELOC(chan, r->bo, r->data, r->flags | - NOUVEAU_BO_DUMMY, r->vor, r->tor); - if (ret) { - debug_printf("OUT_RELOC failed %d\n", ret); - assert(0); - } - } -} - -#endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index ab480cabd0..484f870bd8 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -9,8 +9,9 @@ #include "nouveau/nouveau_device.h" #include "nouveau/nouveau_grobj.h" #include "nouveau/nouveau_notifier.h" -#include "nouveau/nouveau_resource.h" -#include "nouveau/nouveau_pushbuf.h" +#ifndef NOUVEAU_NVC0 +#include "nouveau/nv04_pushbuf.h" +#endif #ifndef NV04_PFIFO_MAX_PACKET_LEN #define NV04_PFIFO_MAX_PACKET_LEN 2047 @@ -41,4 +42,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *); extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +extern struct pipe_screen * +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *); + #endif diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index cb7653c3fe..a5b0d0478c 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14) -- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50) -- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15) -- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53) -- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53) -- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53) +- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) Copyright (C) 2006-2010 by the following authors: - Artur Huillet <arthur.huillet@free.fr> (ahuillet) @@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors: - Mark Carey <mark.carey@gmail.com> (careym) - Matthieu Castet <matthieu.castet@parrot.com> (mat-c) - nvidiaman <nvidiaman@users.sf.net> (nvidiaman) -- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) - Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) - Peter Popov <ironpeter@users.sf.net> (ironpeter) - Richard Hughes <hughsient@users.sf.net> (hughsient) @@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_COMPUTE 0x000050c0 #define NVA3_COMPUTE 0x000085c0 #define NVC0_COMPUTE 0x000090c0 +#define NV84_CRYPT 0x000074c1 #define NV01_SUBCHAN__SIZE 0x00002000 #define NV01_SUBCHAN 0x00000000 @@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV84_SUBCHAN_QUERY_GET 0x0000001c -#define NV84_SUBCHAN_UNK20 0x00000020 +#define NV84_SUBCHAN_QUERY_INTR 0x00000020 -#define NV84_SUBCHAN_UNK24 0x00000024 +#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024 #define NV10_SUBCHAN_REF_CNT 0x00000050 @@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c -#define NV50_SUBCHAN_UNK80 0x00000080 +#define NV40_SUBCHAN_YIELD 0x00000080 #define NV01_GRAPH 0x00000000 @@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40_GRAPH_PM_TRIGGER 0x00000140 +#define NVC0_SUBCHAN__SIZE 0x00008000 +#define NVC0_SUBCHAN 0x00000000 + +#define NVC0_SUBCHAN_OBJECT 0x00000000 + + +#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010 + +#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014 + +#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018 + +#define NVC0_SUBCHAN_QUERY_GET 0x0000001c + +#define NVC0_SUBCHAN_REF_CNT 0x00000050 + +#define NVC0_GRAPH 0x00000000 + +#define NVC0_GRAPH_NOP 0x00000100 + +#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104 + +#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108 + +#define NVC0_GRAPH_NOTIFY 0x0000010c +#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000 +#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001 + +#define NVC0_GRAPH_SERIALIZE 0x00000110 + +#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114 + +#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118 + +#define NVC0_GRAPH_MACRO_ID 0x0000011c + +#define NVC0_GRAPH_MACRO_POS 0x00000120 + #endif /* NV_OBJECT_XML */ diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index bf1e8201a0..02bcc26cfb 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -4,13 +4,10 @@ include $(TOP)/configs/current LIBNAME = nv50 C_SOURCES = \ - nv50_buffer.c \ - nv50_clear.c \ nv50_context.c \ nv50_draw.c \ nv50_formats.c \ nv50_miptree.c \ - nv50_query.c \ nv50_resource.c \ nv50_screen.c \ nv50_state.c \ @@ -19,7 +16,6 @@ C_SOURCES = \ nv50_tex.c \ nv50_transfer.c \ nv50_vbo.c \ - nv50_push.c \ nv50_program.c \ nv50_shader_state.c \ nv50_pc.c \ @@ -27,6 +23,11 @@ C_SOURCES = \ nv50_pc_emit.c \ nv50_tgsi_to_nc.c \ nv50_pc_optimize.c \ - nv50_pc_regalloc.c + nv50_pc_regalloc.c \ + nv50_push.c \ + nv50_query.c + +LIBRARY_INCLUDES = \ + $(LIBDRM_CFLAGS) include ../../Makefile.template diff --git a/src/gallium/drivers/nv50/SConscript b/src/gallium/drivers/nv50/SConscript index 8e7892a9ab..3c8a7276b9 100644 --- a/src/gallium/drivers/nv50/SConscript +++ b/src/gallium/drivers/nv50/SConscript @@ -5,13 +5,10 @@ env = env.Clone() nv50 = env.ConvenienceLibrary( target = 'nv50', source = [ - 'nv50_buffer.c', - 'nv50_clear.c', 'nv50_context.c', 'nv50_draw.c', 'nv50_formats.c', 'nv50_miptree.c', - 'nv50_query.c', 'nv50_resource.c', 'nv50_screen.c', 'nv50_state.c', @@ -20,7 +17,6 @@ nv50 = env.ConvenienceLibrary( 'nv50_tex.c', 'nv50_transfer.c', 'nv50_vbo.c', - 'nv50_push.c', 'nv50_program.c', 'nv50_shader_state.c', 'nv50_pc.c', @@ -29,6 +25,8 @@ nv50 = env.ConvenienceLibrary( 'nv50_tgsi_to_nc.c', 'nv50_pc_optimize.c', 'nv50_pc_regalloc.c', + 'nv50_push.c', + 'nv50_query.c' ]) Export('nv50') diff --git a/src/gallium/drivers/nv50/nv50_2d.xml.h b/src/gallium/drivers/nv50/nv50_2d.xml.h new file mode 100644 index 0000000000..bc9bcf7839 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_2d.xml.h @@ -0,0 +1,393 @@ +#ifndef NV50_2D_XML +#define NV50_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_2d.xml ( 9799 bytes, from 2010-12-28 17:17:11) +- copyright.xml ( 6452 bytes, from 2010-12-15 23:45:18) +- nv_object.xml ( 11898 bytes, from 2010-12-28 17:17:11) +- nvchipsets.xml ( 3074 bytes, from 2010-12-15 23:45:18) +- nv_defs.xml ( 4437 bytes, from 2010-12-15 23:45:18) +- nv50_defs.xml ( 4487 bytes, from 2010-12-15 23:45:18) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + + +#define NV50_2D_DMA_NOTIFY 0x00000180 + +#define NV50_2D_DMA_DST 0x00000184 + +#define NV50_2D_DMA_SRC 0x00000188 + +#define NV50_2D_DMA_COND 0x0000018c + +#define NV50_2D_DST_FORMAT 0x00000200 + +#define NV50_2D_DST_LINEAR 0x00000204 + +#define NV50_2D_DST_TILE_MODE 0x00000208 + +#define NV50_2D_DST_DEPTH 0x0000020c + +#define NV50_2D_DST_LAYER 0x00000210 + +#define NV50_2D_DST_PITCH 0x00000214 + +#define NV50_2D_DST_WIDTH 0x00000218 + +#define NV50_2D_DST_HEIGHT 0x0000021c + +#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NV50_2D_DST_ADDRESS_LOW 0x00000224 + +#define NV50_2D_UNK228 0x00000228 + +#define NV50_2D_SRC_FORMAT 0x00000230 + +#define NV50_2D_SRC_LINEAR 0x00000234 + +#define NV50_2D_SRC_TILE_MODE 0x00000238 + +#define NV50_2D_SRC_DEPTH 0x0000023c + +#define NV50_2D_SRC_LAYER 0x00000240 + +#define NV50_2D_SRC_PITCH 0x00000244 +#define NV50_2D_SRC_PITCH__MAX 0x00040000 + +#define NV50_2D_SRC_WIDTH 0x00000248 +#define NV50_2D_SRC_WIDTH__MAX 0x00010000 + +#define NV50_2D_SRC_HEIGHT 0x0000024c +#define NV50_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NV50_2D_UNK258 0x00000258 + +#define NV50_2D_UNK260 0x00000260 + +#define NV50_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NV50_2D_COND_ADDRESS_LOW 0x00000268 + +#define NV50_2D_COND_MODE 0x0000026c +#define NV50_2D_COND_MODE_NEVER 0x00000000 +#define NV50_2D_COND_MODE_ALWAYS 0x00000001 +#define NV50_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_2D_COND_MODE_EQUAL 0x00000003 +#define NV50_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_2D_CLIP_X 0x00000280 + +#define NV50_2D_CLIP_Y 0x00000284 + +#define NV50_2D_CLIP_W 0x00000288 + +#define NV50_2D_CLIP_H 0x0000028c + +#define NV50_2D_CLIP_ENABLE 0x00000290 + +#define NV50_2D_COLOR_KEY_FORMAT 0x00000294 +#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NV50_2D_COLOR_KEY 0x00000298 + +#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NV50_2D_ROP 0x000002a0 + +#define NV50_2D_BETA1 0x000002a4 + +#define NV50_2D_BETA4 0x000002a8 + +#define NV50_2D_OPERATION 0x000002ac +#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NV50_2D_OPERATION_ROP_AND 0x00000001 +#define NV50_2D_OPERATION_BLEND_AND 0x00000002 +#define NV50_2D_OPERATION_SRCCOPY 0x00000003 +#define NV50_2D_OPERATION_UNK4 0x00000004 +#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NV50_2D_UNK2B0 0x000002b0 +#define NV50_2D_UNK2B0_UNK0__MASK 0x0000003f +#define NV50_2D_UNK2B0_UNK0__SHIFT 0 +#define NV50_2D_UNK2B0_UNK1__MASK 0x00003f00 +#define NV50_2D_UNK2B0_UNK1__SHIFT 8 + +#define NV50_2D_PATTERN_SELECT 0x000002b4 +#define NV50_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NV50_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NV50_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NV50_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NV50_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NV50_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NV50_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NV50_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 + +#define NV50_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NV50_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NV50_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NV50_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NV50_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NV50_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NV50_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NV50_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NV50_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NV50_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NV50_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NV50_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NV50_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NV50_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NV50_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NV50_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NV50_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NV50_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NV50_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NV50_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NV50_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NV50_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NV50_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NV50_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NV50_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NV50_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NV50_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NV50_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NV50_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NV50_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NV50_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NV50_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NV50_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NV50_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NV50_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NV50_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NV50_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NV50_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NV50_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NV50_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NV50_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NV50_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NV50_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NV50_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NV50_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NV50_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NV50_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NV50_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NV50_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NV50_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NV50_2D_PATTERN_Y8__LEN 0x00000010 +#define NV50_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NV50_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NV50_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NV50_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NV50_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NV50_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NV50_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NV50_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NV50_2D_DRAW_SHAPE 0x00000580 +#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NV50_2D_DRAW_SHAPE_LINES 0x00000001 +#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NV50_2D_DRAW_COLOR 0x00000588 + +#define NV50_2D_UNK58C 0x0000058c +#define NV50_2D_UNK58C_0 0x00000001 +#define NV50_2D_UNK58C_1 0x00000010 +#define NV50_2D_UNK58C_2 0x00000100 +#define NV50_2D_UNK58C_3 0x00001000 + +#define NV50_2D_DRAW_POINT16 0x000005e0 +#define NV50_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NV50_2D_DRAW_POINT16_X__SHIFT 0 +#define NV50_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NV50_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NV50_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NV50_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NV50_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NV50_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NV50_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NV50_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NV50_2D_SIFC_FORMAT 0x00000804 + +#define NV50_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NV50_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NV50_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NV50_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NV50_2D_SIFC_WIDTH 0x00000838 + +#define NV50_2D_SIFC_HEIGHT 0x0000083c + +#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NV50_2D_SIFC_DX_DU_INT 0x00000844 + +#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NV50_2D_SIFC_DY_DV_INT 0x0000084c + +#define NV50_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NV50_2D_SIFC_DST_X_INT 0x00000854 + +#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NV50_2D_SIFC_DST_Y_INT 0x0000085c + +#define NV50_2D_SIFC_DATA 0x00000860 + +#define NV50_2D_UNK0870 0x00000870 + +#define NV50_2D_UNK0880 0x00000880 + +#define NV50_2D_UNK0884 0x00000884 + +#define NV50_2D_UNK0888 0x00000888 + +#define NV50_2D_BLIT_CONTROL 0x0000088c +#define NV50_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NV50_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NV50_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NV50_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NV50_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NV50_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NV50_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NV50_2D_BLIT_DST_X 0x000008b0 + +#define NV50_2D_BLIT_DST_Y 0x000008b4 + +#define NV50_2D_BLIT_DST_W 0x000008b8 + +#define NV50_2D_BLIT_DST_H 0x000008bc + +#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NV50_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NV50_2D_BLIT_DV_DY_INT 0x000008cc + +#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NV50_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc + +#define NVC0_2D_FIRMWARE(i0) (0x000008e0 + 0x4*(i0)) +#define NVC0_2D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_2D_FIRMWARE__LEN 0x00000020 + + +#endif /* NV50_2D_XML */ diff --git a/src/gallium/drivers/nv50/nv50_3d.xml.h b/src/gallium/drivers/nv50/nv50_3d.xml.h new file mode 100644 index 0000000000..9bb3211728 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_3d.xml.h @@ -0,0 +1,2084 @@ +#ifndef NV50_3D_XML +#define NV50_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_3d.xml ( 64479 bytes, from 2011-02-27 17:58:08) +- copyright.xml ( 6452 bytes, from 2010-12-15 23:45:18) +- nv_defs.xml ( 4437 bytes, from 2010-12-15 23:45:18) +- nv50_defs.xml ( 4487 bytes, from 2010-12-15 23:45:18) +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-15 23:45:18) +- nv_object.xml ( 12191 bytes, from 2011-02-27 17:58:08) +- nvchipsets.xml ( 3074 bytes, from 2011-02-27 17:58:08) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#define NV50_3D_SERIALIZE 0x00000110 + +#define NV50_3D_DMA_NOTIFY 0x00000180 + +#define NV50_3D_DMA_ZETA 0x00000184 + +#define NV50_3D_DMA_QUERY 0x00000188 + +#define NV50_3D_DMA_VTXBUF 0x0000018c + +#define NV50_3D_DMA_LOCAL 0x00000190 + +#define NV50_3D_DMA_STACK 0x00000194 + +#define NV50_3D_DMA_CODE_CB 0x00000198 + +#define NV50_3D_DMA_TSC 0x0000019c + +#define NV50_3D_DMA_TIC 0x000001a0 + +#define NV50_3D_DMA_TEXTURE 0x000001a4 + +#define NV50_3D_DMA_STRMOUT 0x000001a8 + +#define NV50_3D_DMA_CLIPID 0x000001ac + +#define NV50_3D_DMA_COLOR(i0) (0x000001c0 + 0x4*(i0)) +#define NV50_3D_DMA_COLOR__ESIZE 0x00000004 +#define NV50_3D_DMA_COLOR__LEN 0x00000008 + +#define NV50_3D_RT(i0) (0x00000200 + 0x20*(i0)) +#define NV50_3D_RT__ESIZE 0x00000020 +#define NV50_3D_RT__LEN 0x00000008 + +#define NV50_3D_RT_ADDRESS_HIGH(i0) (0x00000200 + 0x20*(i0)) + +#define NV50_3D_RT_ADDRESS_LOW(i0) (0x00000204 + 0x20*(i0)) + +#define NV50_3D_RT_FORMAT(i0) (0x00000208 + 0x20*(i0)) + +#define NV50_3D_RT_TILE_MODE(i0) (0x0000020c + 0x20*(i0)) +#define NV50_3D_RT_TILE_MODE_X__MASK 0x0000000f +#define NV50_3D_RT_TILE_MODE_X__SHIFT 0 +#define NV50_3D_RT_TILE_MODE_Y__MASK 0x000000f0 +#define NV50_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NV50_3D_RT_TILE_MODE_Z__MASK 0x00000f00 +#define NV50_3D_RT_TILE_MODE_Z__SHIFT 8 + +#define NV50_3D_RT_LAYER_STRIDE(i0) (0x00000210 + 0x20*(i0)) +#define NV50_3D_RT_LAYER_STRIDE__SHR 2 + +#define NV50_3D_RT_UNK14(i0) (0x00000214 + 0x20*(i0)) + +#define NV50_3D_VTX_ATTR_1F(i0) (0x00000300 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_1F__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_1F__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_2H(i0) (0x00000340 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2H__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2H__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2H_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2H_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2H_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2H_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2F_X(i0) (0x00000380 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_2F_X__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_2F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_2F_Y(i0) (0x00000384 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_2F_Y__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_2F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_X(i0) (0x00000400 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_X__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_Y(i0) (0x00000404 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_Y__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_Z(i0) (0x00000408 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_Z__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_Z__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_X(i0) (0x00000500 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_X__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_Y(i0) (0x00000504 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_Y__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_Z(i0) (0x00000508 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_Z__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_Z__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_W(i0) (0x0000050c + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_W__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_W__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4H_0(i0) (0x00000600 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4H_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4H_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4H_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4H_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4H_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4H_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4H_1(i0) (0x00000604 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4H_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4H_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4H_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4H_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4H_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4H_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2I(i0) (0x00000680 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2I__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2I__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2I_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2I_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2I_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2I_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2NI(i0) (0x000006c0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2NI__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2NI__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2NI_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2NI_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2NI_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2NI_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4I_0(i0) (0x00000700 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4I_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4I_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4I_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4I_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4I_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4I_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4I_1(i0) (0x00000704 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4I_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4I_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4I_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4I_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4I_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4I_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4NI_0(i0) (0x00000780 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4NI_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4NI_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NI_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4NI_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NI_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4NI_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4NI_1(i0) (0x00000784 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4NI_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4NI_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NI_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4NI_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NI_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4NI_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4UB(i0) (0x00000800 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4UB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4UB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4UB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4UB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4UB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4UB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4UB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4UB_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4B(i0) (0x00000840 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4B__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4B__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4B_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4B_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4B_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4B_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4B_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4B_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4B_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4B_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4NUB(i0) (0x00000880 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4NUB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4NUB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NUB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4NUB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NUB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4NUB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4NUB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4NUB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4NUB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4NUB_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4NB(i0) (0x000008c0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4NB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4NB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4NB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4NB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4NB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4NB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4NB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4NB_W__SHIFT 24 + +#define NV50_3D_VERTEX_ARRAY_FETCH(i0) (0x00000900 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_FETCH__LEN 0x00000010 +#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NV50_3D_VERTEX_ARRAY_FETCH_ENABLE 0x20000000 + +#define NV50_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00000904 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_START_LOW(i0) (0x00000908 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_DIVISOR(i0) (0x0000090c + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NV50_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NV50_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NV50_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NV50_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NV50_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NV50_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NV50_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NV50_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NV50_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NV50_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NV50_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NV50_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NV50_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NV50_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NV50_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NV50_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NV50_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NV50_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NV50_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NV50_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NV50_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NV50_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NV50_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NV50_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NV50_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NV50_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NV50_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NV50_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NV50_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NV50_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NV50_3D_CLIP_RECT_VERT_MAX__SHIFT 16 + +#define NV50_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NV50_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NV50_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NV50_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NV50_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NV50_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NV50_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NV50_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NV50_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NV50_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NV50_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NV50_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NV50_3D_UNK0D60 0x00000d60 + +#define NV50_3D_UNK0D64 0x00000d64 + +#define NV50_3D_COUNTER_ENABLE 0x00000d68 +#define NV50_3D_COUNTER_ENABLE_VFETCH_VERTICES 0x00000001 +#define NV50_3D_COUNTER_ENABLE_VFETCH_PRIMITIVES 0x00000002 +#define NV50_3D_COUNTER_ENABLE_VP_LAUNCHES 0x00000004 +#define NV50_3D_COUNTER_ENABLE_GP_LAUNCHES 0x00000008 +#define NV50_3D_COUNTER_ENABLE_GP_PRIMITIVES_OUT 0x00000010 +#define NV50_3D_COUNTER_ENABLE_TRANSFORM_FEEDBACK 0x00000020 +#define NV50_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000040 +#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_PRECLIP 0x00000080 +#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_POSTCLIP 0x00000100 +#define NV50_3D_COUNTER_ENABLE_FP_PIXELS 0x00000200 +#define NV84_3D_COUNTER_ENABLE_UNK0A 0x00000400 + +#define NV50_3D_UNK0D6C(i0) (0x00000d6c + 0x4*(i0)) +#define NV50_3D_UNK0D6C__ESIZE 0x00000004 +#define NV50_3D_UNK0D6C__LEN 0x00000002 +#define NV50_3D_UNK0D6C_X__MASK 0x0000ffff +#define NV50_3D_UNK0D6C_X__SHIFT 0 +#define NV50_3D_UNK0D6C_Y__MASK 0xffff0000 +#define NV50_3D_UNK0D6C_Y__SHIFT 16 + +#define NV50_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NV50_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NV50_3D_UNK0D7C 0x00000d7c + +#define NV50_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NV50_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NV50_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NV50_3D_CLEAR_DEPTH 0x00000d90 + +#define NV50_3D_STACK_ADDRESS_HIGH 0x00000d94 + +#define NV50_3D_STACK_ADDRESS_LOW 0x00000d98 + +#define NV50_3D_STACK_SIZE_LOG 0x00000d9c + +#define NV50_3D_CLEAR_STENCIL 0x00000da0 + +#define NV50_3D_STRMOUT_PARAMS_LATCH 0x00000da4 + +#define NV50_3D_STRMOUT_PRIMITIVE_LIMIT 0x00000da8 + +#define NV50_3D_POLYGON_MODE_FRONT 0x00000dac +#define NV50_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV50_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV50_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NV50_3D_POLYGON_MODE_BACK 0x00000db0 +#define NV50_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV50_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV50_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NV50_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NV50_3D_UNK0DB8 0x00000db8 + +#define NV50_3D_ZCULL_UNK0DBC 0x00000dbc +#define NV50_3D_ZCULL_UNK0DBC_UNK0 0x00000001 +#define NV50_3D_ZCULL_UNK0DBC_UNK16__MASK 0x00030000 +#define NV50_3D_ZCULL_UNK0DBC_UNK16__SHIFT 16 + +#define NV50_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NV50_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NV50_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NV50_3D_UNK0DCC 0x00000dcc + +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0(i0) (0x00000dd0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__LEN 0x00000002 + +#define NV50_3D_ZCULL_UNK0DD8 0x00000dd8 +#define NV50_3D_ZCULL_UNK0DD8_UNK0__MASK 0x00000007 +#define NV50_3D_ZCULL_UNK0DD8_UNK0__SHIFT 0 +#define NVA3_3D_ZCULL_UNK0DD8_UNK9 0x00000200 +#define NV50_3D_ZCULL_UNK0DD8_UNK16__MASK 0xffff0000 +#define NV50_3D_ZCULL_UNK0DD8_UNK16__SHIFT 16 + +#define NV50_3D_UNK0DDC 0x00000ddc + +#define NV50_3D_UNK0DE0 0x00000de0 + +#define NV50_3D_WATCHDOG_TIMER 0x00000de4 + +#define NV50_3D_UNK0DE8 0x00000de8 + +#define NV50_3D_UNK0DEC 0x00000dec + +#define NV50_3D_UNK0DF0 0x00000df0 +#define NV50_3D_UNK0DF0_UNK0 0x00000001 +#define NV50_3D_UNK0DF0_UNK1__MASK 0x00000ff0 +#define NV50_3D_UNK0DF0_UNK1__SHIFT 4 + +#define NV50_3D_UNK0DF4 0x00000df4 + +#define NV50_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NV50_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NV50_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NV50_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NV50_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NV50_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NV50_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NV50_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NV50_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NV50_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NV50_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NV50_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_VERT__LEN 0x00000010 +#define NV50_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NV50_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NV50_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NV50_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NV50_3D_CB_ADDR 0x00000f00 +#define NV50_3D_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_3D_CB_ADDR_ID__SHIFT 8 +#define NV50_3D_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_3D_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_3D_CB_DATA(i0) (0x00000f04 + 0x4*(i0)) +#define NV50_3D_CB_DATA__ESIZE 0x00000004 +#define NV50_3D_CB_DATA__LEN 0x00000010 + +#define NV50_3D_LOCAL_WARPS_LOG_ALLOC 0x00000f44 + +#define NV50_3D_LOCAL_WARPS_NO_CLAMP 0x00000f48 + +#define NV50_3D_STACK_WARPS_LOG_ALLOC 0x00000f4c + +#define NV50_3D_STACK_WARPS_NO_CLAMP 0x00000f50 + +#define NV50_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NV50_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NV50_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NV50_3D_UNK0F60(i0) (0x00000f60 + 0x4*(i0)) +#define NV50_3D_UNK0F60__ESIZE 0x00000004 +#define NV50_3D_UNK0F60__LEN 0x00000004 + +#define NV50_3D_GP_ADDRESS_HIGH 0x00000f70 + +#define NV50_3D_GP_ADDRESS_LOW 0x00000f74 + +#define NV50_3D_UNK0F78 0x00000f78 + +#define NV50_3D_VP_ADDRESS_HIGH 0x00000f7c + +#define NV50_3D_VP_ADDRESS_LOW 0x00000f80 + +#define NV50_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NV50_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NV50_3D_UNK0F8C 0x00000f8c + +#define NV50_3D_UNK0F90 0x00000f90 + +#define NV50_3D_UNK0F94 0x00000f94 + +#define NV50_3D_UNK0F98 0x00000f98 + +#define NV50_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NV50_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NV50_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NV50_3D_FP_ADDRESS_HIGH 0x00000fa4 + +#define NV50_3D_FP_ADDRESS_LOW 0x00000fa8 + +#define NV50_3D_UNK0FAC 0x00000fac +#define NV50_3D_UNK0FAC_UNK0 0x00000001 +#define NVA0_3D_UNK0FAC_UNK2 0x00000002 +#define NV50_3D_UNK0FAC_UNK1__MASK 0x000ffff0 +#define NV50_3D_UNK0FAC_UNK1__SHIFT 4 + +#define NV50_3D_UNK0FB0 0x00000fb0 + +#define NV50_3D_UNK0FB4 0x00000fb4 + +#define NV50_3D_UNK0FB8 0x00000fb8 + +#define NV50_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NV50_3D_MSAA_MASK__ESIZE 0x00000004 +#define NV50_3D_MSAA_MASK__LEN 0x00000004 + +#define NV50_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NV50_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NV50_3D_MAP_SEMANTIC_5 0x00000fd4 +#define NV50_3D_MAP_SEMANTIC_5_VIEWPORT_ID__MASK 0x000000ff +#define NV50_3D_MAP_SEMANTIC_5_VIEWPORT_ID__SHIFT 0 + +#define NV50_3D_UNK0FD8 0x00000fd8 +#define NV50_3D_UNK0FD8_UNK0 0x00000001 +#define NV50_3D_UNK0FD8_UNK1 0x00000010 + +#define NV50_3D_UNK0FDC 0x00000fdc + +#define NV50_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NV50_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NV50_3D_ZETA_FORMAT 0x00000fe8 + +#define NV50_3D_ZETA_TILE_MODE 0x00000fec + +#define NV50_3D_ZETA_LAYER_STRIDE 0x00000ff0 +#define NV50_3D_ZETA_LAYER_STRIDE__SHR 2 + +#define NV50_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NV50_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NV50_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NV50_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NV50_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NV50_3D_UNK0FFC 0x00000ffc + +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001000 + 0x4*(i0)) +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000010 + +#define NV50_3D_UNK1040(i0) (0x00001040 + 0x4*(i0)) +#define NV50_3D_UNK1040__ESIZE 0x00000004 +#define NV50_3D_UNK1040__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001080 + 0x8*(i0)) +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001084 + 0x8*(i0)) +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000010 + +#define NV50_3D_UNK1100 0x00001100 + +#define NV84_3D_UNK1104 0x00001104 +#define NV84_3D_UNK1104_0__MASK 0x0000ffff +#define NV84_3D_UNK1104_0__SHIFT 0 +#define NV84_3D_UNK1104_0__MAX 0x00002000 +#define NV84_3D_UNK1104_0__ALIGN 0x00000040 +#define NV84_3D_UNK1104_1__MASK 0xffff0000 +#define NV84_3D_UNK1104_1__SHIFT 16 +#define NV84_3D_UNK1104_1__MAX 0x00002000 +#define NV84_3D_UNK1104_1__ALIGN 0x00000040 + +#define NV84_3D_UNK1108 0x00001108 +#define NV84_3D_UNK1108_0 0x00000001 +#define NV84_3D_UNK1108_1 0x00000010 + +#define NV84_3D_UNK110C 0x0000110c + +#define NV84_3D_UNK1110 0x00001110 + +#define NV84_3D_WRCACHE_FLUSH 0x00001114 + +#define NV84_3D_VERTEX_ID_BASE 0x00001118 + +#define NV84_3D_PRIMITIVE_ID 0x0000111c + +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT(i0) (0x00001120 + 0x4*(i0)) +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE 0x00000004 +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x00000004 + +#define NVA3_3D_VP_ATTR_EN_ALT(i0) (0x00001130 + 0x4*(i0)) +#define NVA3_3D_VP_ATTR_EN_ALT__ESIZE 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT__LEN 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT_7__MASK 0xf0000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7__SHIFT 28 +#define NVA3_3D_VP_ATTR_EN_ALT_7_X 0x10000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_Y 0x20000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_Z 0x40000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_W 0x80000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6__MASK 0x0f000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6__SHIFT 24 +#define NVA3_3D_VP_ATTR_EN_ALT_6_X 0x01000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_Y 0x02000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_Z 0x04000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_W 0x08000000 +#define NVA3_3D_VP_ATTR_EN_ALT_5__MASK 0x00f00000 +#define NVA3_3D_VP_ATTR_EN_ALT_5__SHIFT 20 +#define NVA3_3D_VP_ATTR_EN_ALT_5_X 0x00100000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_Y 0x00200000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_Z 0x00400000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_W 0x00800000 +#define NVA3_3D_VP_ATTR_EN_ALT_4__MASK 0x000f0000 +#define NVA3_3D_VP_ATTR_EN_ALT_4__SHIFT 16 +#define NVA3_3D_VP_ATTR_EN_ALT_4_X 0x00010000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_Y 0x00020000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_Z 0x00040000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_W 0x00080000 +#define NVA3_3D_VP_ATTR_EN_ALT_3__MASK 0x0000f000 +#define NVA3_3D_VP_ATTR_EN_ALT_3__SHIFT 12 +#define NVA3_3D_VP_ATTR_EN_ALT_3_X 0x00001000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_Y 0x00002000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_Z 0x00004000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_W 0x00008000 +#define NVA3_3D_VP_ATTR_EN_ALT_2__MASK 0x00000f00 +#define NVA3_3D_VP_ATTR_EN_ALT_2__SHIFT 8 +#define NVA3_3D_VP_ATTR_EN_ALT_2_X 0x00000100 +#define NVA3_3D_VP_ATTR_EN_ALT_2_Y 0x00000200 +#define NVA3_3D_VP_ATTR_EN_ALT_2_Z 0x00000400 +#define NVA3_3D_VP_ATTR_EN_ALT_2_W 0x00000800 +#define NVA3_3D_VP_ATTR_EN_ALT_1__MASK 0x000000f0 +#define NVA3_3D_VP_ATTR_EN_ALT_1__SHIFT 4 +#define NVA3_3D_VP_ATTR_EN_ALT_1_X 0x00000010 +#define NVA3_3D_VP_ATTR_EN_ALT_1_Y 0x00000020 +#define NVA3_3D_VP_ATTR_EN_ALT_1_Z 0x00000040 +#define NVA3_3D_VP_ATTR_EN_ALT_1_W 0x00000080 +#define NVA3_3D_VP_ATTR_EN_ALT_0__MASK 0x0000000f +#define NVA3_3D_VP_ATTR_EN_ALT_0__SHIFT 0 +#define NVA3_3D_VP_ATTR_EN_ALT_0_X 0x00000001 +#define NVA3_3D_VP_ATTR_EN_ALT_0_Y 0x00000002 +#define NVA3_3D_VP_ATTR_EN_ALT_0_Z 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT_0_W 0x00000008 + +#define NVA3_3D_UNK1140 0x00001140 + +#define NVA0_3D_UNK1144 0x00001144 + +#define NVA0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVA0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVA0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVA0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT(i0) (0x00001160 + 0x4*(i0)) +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__ESIZE 0x00000004 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__LEN 0x00000020 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__MASK 0x0000001f +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__SHIFT 0 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_CONST 0x00000040 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__MASK 0x001fff80 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__SHIFT 7 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__MASK 0x07e00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__SHIFT 21 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32_32 0x00200000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32 0x00400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16_16 0x00600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32 0x00800000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16 0x00a00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8_8 0x01400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16 0x01e00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32 0x02400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8 0x02600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_2_10_10_10 0x06000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UNORM 0x10000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SINT 0x18000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UINT 0x20000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_USCALED 0x28000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SSCALED 0x30000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_FLOAT 0x38000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BGRA 0x80000000 + +#define NV50_3D_RT_CONTROL 0x0000121c +#define NV50_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NV50_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NV50_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NV50_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NV50_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NV50_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NV50_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NV50_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NV50_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NV50_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NV50_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NV50_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NV50_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NV50_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NV50_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NV50_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NV50_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NV50_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NV50_3D_UNK1220 0x00001220 + +#define NV50_3D_RT_ARRAY_MODE 0x00001224 +#define NV50_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NV50_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NV50_3D_RT_ARRAY_MODE_MODE__MASK 0x00010000 +#define NV50_3D_RT_ARRAY_MODE_MODE__SHIFT 16 +#define NV50_3D_RT_ARRAY_MODE_MODE_2D_ARRAY 0x00000000 +#define NV50_3D_RT_ARRAY_MODE_MODE_3D 0x00010000 + +#define NV50_3D_ZETA_HORIZ 0x00001228 + +#define NV50_3D_ZETA_VERT 0x0000122c + +#define NV50_3D_ZETA_ARRAY_MODE 0x00001230 +#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NV50_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NV50_3D_LINKED_TSC 0x00001234 + +#define NV50_3D_UNK1238 0x00001238 + +#define NVA0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NV50_3D_RT_HORIZ(i0) (0x00001240 + 0x8*(i0)) +#define NV50_3D_RT_HORIZ__ESIZE 0x00000008 +#define NV50_3D_RT_HORIZ__LEN 0x00000008 +#define NV50_3D_RT_HORIZ_WIDTH__MASK 0x0fffffff +#define NV50_3D_RT_HORIZ_WIDTH__SHIFT 0 +#define NV50_3D_RT_HORIZ_LINEAR 0x80000000 + +#define NV50_3D_RT_VERT(i0) (0x00001244 + 0x8*(i0)) +#define NV50_3D_RT_VERT__ESIZE 0x00000008 +#define NV50_3D_RT_VERT__LEN 0x00000008 + +#define NV50_3D_CB_DEF_ADDRESS_HIGH 0x00001280 + +#define NV50_3D_CB_DEF_ADDRESS_LOW 0x00001284 + +#define NV50_3D_CB_DEF_SET 0x00001288 +#define NV50_3D_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_3D_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_3D_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_3D_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_3D_UNK128C 0x0000128c +#define NV50_3D_UNK128C_0__MASK 0x00000003 +#define NV50_3D_UNK128C_0__SHIFT 0 +#define NV50_3D_UNK128C_1__MASK 0x00000030 +#define NV50_3D_UNK128C_1__SHIFT 4 +#define NV50_3D_UNK128C_2__MASK 0x00000300 +#define NV50_3D_UNK128C_2__SHIFT 8 +#define NV50_3D_UNK128C_3__MASK 0x00003000 +#define NV50_3D_UNK128C_3__SHIFT 12 + +#define NV50_3D_CALL_LIMIT_LOG 0x00001290 +#define NV50_3D_CALL_LIMIT_LOG_VP__MASK 0x0000000f +#define NV50_3D_CALL_LIMIT_LOG_VP__SHIFT 0 +#define NV50_3D_CALL_LIMIT_LOG_GP__MASK 0x000000f0 +#define NV50_3D_CALL_LIMIT_LOG_GP__SHIFT 4 +#define NV50_3D_CALL_LIMIT_LOG_FP__MASK 0x00000f00 +#define NV50_3D_CALL_LIMIT_LOG_FP__SHIFT 8 + +#define NV50_3D_STRMOUT_BUFFERS_CTRL 0x00001294 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED 0x00000001 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__MASK 0x00000002 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__SHIFT 1 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_PRIMITIVES 0x00000000 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET 0x00000002 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__MASK 0x000000f0 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT 4 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MASK 0x000fff00 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT 8 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX 0x00000800 + +#define NV50_3D_FP_RESULT_COUNT 0x00001298 + +#define NV50_3D_VTX_UNK129C 0x0000129c + +#define NV50_3D_UNK12A0 0x000012a0 + +#define NV50_3D_UNK12A8 0x000012a8 +#define NV50_3D_UNK12A8_UNK1 0x00000001 +#define NV50_3D_UNK12A8_UNK2__MASK 0x000ffff0 +#define NV50_3D_UNK12A8_UNK2__SHIFT 4 + +#define NV50_3D_UNK12AC 0x000012ac + +#define NV50_3D_UNK12B0 0x000012b0 +#define NV50_3D_UNK12B0_UNK0__MASK 0x000000ff +#define NV50_3D_UNK12B0_UNK0__SHIFT 0 +#define NV50_3D_UNK12B0_UNK1__MASK 0x0000ff00 +#define NV50_3D_UNK12B0_UNK1__SHIFT 8 +#define NV50_3D_UNK12B0_UNK2__MASK 0x00ff0000 +#define NV50_3D_UNK12B0_UNK2__SHIFT 16 +#define NV50_3D_UNK12B0_UNK3__MASK 0xff000000 +#define NV50_3D_UNK12B0_UNK3__SHIFT 24 +#define NV50_3D_UNK12B0_UNK3__MAX 0x00000080 + +#define NV50_3D_UNK12B4 0x000012b4 + +#define NV50_3D_UNK12B8 0x000012b8 + +#define NV50_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NV50_3D_D3D_FILL_MODE 0x000012d0 +#define NV50_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NV50_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NV50_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NV50_3D_SHADE_MODEL 0x000012d4 +#define NV50_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NV50_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NV50_3D_LOCAL_ADDRESS_HIGH 0x000012d8 + +#define NV50_3D_LOCAL_ADDRESS_LOW 0x000012dc + +#define NV50_3D_LOCAL_SIZE_LOG 0x000012e0 + +#define NV50_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NV50_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NV50_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NV50_3D_PM_SET(i0) (0x000012f0 + 0x4*(i0)) +#define NV50_3D_PM_SET__ESIZE 0x00000004 +#define NV50_3D_PM_SET__LEN 0x00000004 + +#define NV50_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NV50_3D_VB_ELEMENT_U8 0x00001304 +#define NV50_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NV50_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NV50_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NV50_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NV50_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NV50_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NV50_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NV50_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NV50_3D_D3D_CULL_MODE 0x00001308 +#define NV50_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NV50_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NV50_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NV50_3D_DEPTH_TEST_FUNC 0x0000130c +#define NV50_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NV50_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NV50_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NV50_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NV50_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NV50_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_ALPHA_TEST_REF 0x00001310 + +#define NV50_3D_ALPHA_TEST_FUNC 0x00001314 +#define NV50_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV50_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV50_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV50_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV50_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV50_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVA0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVA0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVA0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NV50_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NV50_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NV50_3D_BLEND_COLOR__LEN 0x00000004 + +#define NV50_3D_UNK132C 0x0000132c + +#define NV50_3D_TSC_FLUSH 0x00001330 +#define NV50_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_3D_TIC_FLUSH 0x00001334 +#define NV50_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_3D_TEX_CACHE_CTL 0x00001338 +#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_3D_UNK133C 0x0000133c + +#define NV50_3D_BLEND_EQUATION_RGB 0x00001340 +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV50_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV50_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV50_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NV50_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NV50_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NV50_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NV50_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV50_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NV50_3D_UNK1354 0x00001354 + +#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NV50_3D_UNK135C 0x0000135c + +#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NV50_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NV50_3D_STENCIL_ENABLE 0x00001380 + +#define NV50_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NV50_3D_STENCIL_FRONT_MASK 0x00001398 + +#define NV50_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c + +#define NV50_3D_UNK13A0 0x000013a0 + +#define NVA0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NV50_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NV50_3D_SCREEN_Y_CONTROL 0x000013ac +#define NV50_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NV50_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NV50_3D_LINE_WIDTH 0x000013b0 + +#define NV50_3D_TEX_LIMITS(i0) (0x000013b4 + 0x4*(i0)) +#define NV50_3D_TEX_LIMITS__ESIZE 0x00000004 +#define NV50_3D_TEX_LIMITS__LEN 0x00000003 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_3D_POINT_COORD_REPLACE_MAP(i0) (0x000013c0 + 0x4*(i0)) +#define NV50_3D_POINT_COORD_REPLACE_MAP__ESIZE 0x00000004 +#define NV50_3D_POINT_COORD_REPLACE_MAP__LEN 0x00000010 + +#define NV50_3D_UNK1400_LANES 0x00001400 + +#define NV50_3D_UNK1404 0x00001404 + +#define NV50_3D_UNK1408 0x00001408 + +#define NV50_3D_VP_START_ID 0x0000140c + +#define NV50_3D_GP_START_ID 0x00001410 + +#define NV50_3D_FP_START_ID 0x00001414 + +#define NVA3_3D_UNK1418 0x00001418 + +#define NV50_3D_UNK141C 0x0000141c + +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NV50_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NV50_3D_UNK1430 0x00001430 +#define NV50_3D_UNK1430_UNK0 0x00000010 +#define NV50_3D_UNK1430_UNK1 0x00000100 + +#define NV50_3D_VB_ELEMENT_BASE 0x00001434 + +#define NV50_3D_VB_INSTANCE_BASE 0x00001438 + +#define NV50_3D_CLEAR_FLAGS 0x0000143c +#define NV50_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__MASK 0x00000010 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__SHIFT 4 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_SCISSOR 0x00000000 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT 0x00000010 + +#define NV50_3D_CODE_CB_FLUSH 0x00001440 + +#define NV50_3D_BIND_TSC(i0) (0x00001444 + 0x8*(i0)) +#define NV50_3D_BIND_TSC__ESIZE 0x00000008 +#define NV50_3D_BIND_TSC__LEN 0x00000003 +#define NV50_3D_BIND_TSC_VALID 0x00000001 +#define NV50_3D_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_3D_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_3D_BIND_TSC_TSC__SHIFT 12 + +#define NV50_3D_BIND_TIC(i0) (0x00001448 + 0x8*(i0)) +#define NV50_3D_BIND_TIC__ESIZE 0x00000008 +#define NV50_3D_BIND_TIC__LEN 0x00000003 +#define NV50_3D_BIND_TIC_VALID 0x00000001 +#define NV50_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_3D_BIND_TIC_TIC__SHIFT 9 + +#define NV50_3D_BIND_TSC2(i0) (0x00001468 + 0x8*(i0)) +#define NV50_3D_BIND_TSC2__ESIZE 0x00000008 +#define NV50_3D_BIND_TSC2__LEN 0x00000003 +#define NV50_3D_BIND_TSC2_VALID 0x00000001 +#define NV50_3D_BIND_TSC2_SAMPLER__MASK 0x000000f0 +#define NV50_3D_BIND_TSC2_SAMPLER__SHIFT 4 +#define NV50_3D_BIND_TSC2_TSC__MASK 0x001ff000 +#define NV50_3D_BIND_TSC2_TSC__SHIFT 12 + +#define NV50_3D_BIND_TIC2(i0) (0x0000146c + 0x8*(i0)) +#define NV50_3D_BIND_TIC2__ESIZE 0x00000008 +#define NV50_3D_BIND_TIC2__LEN 0x00000003 +#define NV50_3D_BIND_TIC2_VALID 0x00000001 +#define NV50_3D_BIND_TIC2_TEXTURE__MASK 0x000001fe +#define NV50_3D_BIND_TIC2_TEXTURE__SHIFT 1 +#define NV50_3D_BIND_TIC2_TIC__MASK 0x7ffffe00 +#define NV50_3D_BIND_TIC2_TIC__SHIFT 9 + +#define NV50_3D_STRMOUT_MAP(i0) (0x00001480 + 0x4*(i0)) +#define NV50_3D_STRMOUT_MAP__ESIZE 0x00000004 +#define NV50_3D_STRMOUT_MAP__LEN 0x00000020 + +#define NV50_3D_CLIPID_HEIGHT 0x00001504 +#define NV50_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NV50_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NV50_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NV50_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NV50_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NV50_3D_POINT_SIZE 0x00001518 + +#define NV50_3D_ZCULL_STATCTRS_ENABLE 0x0000151c + +#define NV50_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVA0_3D_UNK152C 0x0000152c +#define NVA0_3D_UNK152C_UNK0 0x00000001 +#define NVA0_3D_UNK152C_UNK1 0x00000010 +#define NVA0_3D_UNK152C_UNK2 0x00000100 +#define NVA0_3D_UNK152C_UNK3__MASK 0x000ff000 +#define NVA0_3D_UNK152C_UNK3__SHIFT 12 +#define NVA0_3D_UNK152C_UNK3__MAX 0x00000028 + +#define NV50_3D_COUNTER_RESET 0x00001530 +#define NV50_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NV50_3D_COUNTER_RESET_ZCULL_STATS 0x00000002 +#define NVA0_3D_COUNTER_RESET_STRMOUT_VERTICES 0x00000008 +#define NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK 0x00000010 +#define NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x00000011 +#define NV50_3D_COUNTER_RESET_VFETCH_VERTICES 0x00000012 +#define NV50_3D_COUNTER_RESET_VFETCH_PRIMITIVES 0x00000013 +#define NV50_3D_COUNTER_RESET_VP_LAUNCHES 0x00000015 +#define NV50_3D_COUNTER_RESET_GP_LAUNCHES 0x0000001a +#define NV50_3D_COUNTER_RESET_GP_PRIMITIVES_OUT 0x0000001b +#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_PRECLIP 0x0000001c +#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_POSTCLIP 0x0000001d +#define NV50_3D_COUNTER_RESET_FP_PIXELS 0x0000001e + +#define NV50_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NV50_3D_ZETA_ENABLE 0x00001538 + +#define NV50_3D_MULTISAMPLE_CTRL 0x0000153c +#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NV50_3D_NOPERSPECTIVE_BITMAP(i0) (0x00001540 + 0x4*(i0)) +#define NV50_3D_NOPERSPECTIVE_BITMAP__ESIZE 0x00000004 +#define NV50_3D_NOPERSPECTIVE_BITMAP__LEN 0x00000004 + +#define NV50_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NV50_3D_COND_ADDRESS_LOW 0x00001554 + +#define NV50_3D_COND_MODE 0x00001558 +#define NV50_3D_COND_MODE_NEVER 0x00000000 +#define NV50_3D_COND_MODE_ALWAYS 0x00000001 +#define NV50_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_3D_COND_MODE_EQUAL 0x00000003 +#define NV50_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NV50_3D_TSC_ADDRESS_LOW 0x00001560 +#define NV50_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_3D_TSC_LIMIT 0x00001564 +#define NV50_3D_TSC_LIMIT__MAX 0x00001fff + +#define NV50_3D_UNK1568 0x00001568 + +#define NV50_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NV50_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NV50_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NV50_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NV50_3D_TIC_LIMIT 0x0000157c + +#define NV50_3D_PM_CONTROL(i0) (0x00001580 + 0x4*(i0)) +#define NV50_3D_PM_CONTROL__ESIZE 0x00000004 +#define NV50_3D_PM_CONTROL__LEN 0x00000004 +#define NV50_3D_PM_CONTROL_UNK0 0x00000001 +#define NV50_3D_PM_CONTROL_UNK1__MASK 0x00000070 +#define NV50_3D_PM_CONTROL_UNK1__SHIFT 4 +#define NV50_3D_PM_CONTROL_UNK2__MASK 0x00ffff00 +#define NV50_3D_PM_CONTROL_UNK2__SHIFT 8 +#define NV50_3D_PM_CONTROL_UNK3__MASK 0xff000000 +#define NV50_3D_PM_CONTROL_UNK3__SHIFT 24 + +#define NV50_3D_ZCULL_REGION 0x00001590 + +#define NV50_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NV50_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NV50_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_UNK15A8 0x000015a8 +#define NV50_3D_UNK15A8_UNK1__MASK 0x00000007 +#define NV50_3D_UNK15A8_UNK1__SHIFT 0 +#define NV50_3D_UNK15A8_UNK2__MASK 0x00000070 +#define NV50_3D_UNK15A8_UNK2__SHIFT 4 + +#define NV50_3D_UNK15AC 0x000015ac + +#define NV50_3D_UNK15B0 0x000015b0 +#define NV50_3D_UNK15B0_0 0x00000001 +#define NV50_3D_UNK15B0_1 0x00000010 +#define NV50_3D_UNK15B0_2 0x00000100 + +#define NV50_3D_CSAA_ENABLE 0x000015b4 + +#define NV50_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NV50_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVA3_3D_UNK15C4 0x000015c4 + +#define NVA3_3D_UNK15C8 0x000015c8 + +#define NV50_3D_LAYER 0x000015cc +#define NV50_3D_LAYER_IDX__MASK 0x0000ffff +#define NV50_3D_LAYER_IDX__SHIFT 0 +#define NV50_3D_LAYER_USE_GP 0x00010000 + +#define NV50_3D_MULTISAMPLE_MODE 0x000015d0 +#define NV50_3D_MULTISAMPLE_MODE_MS1 0x00000000 +#define NV50_3D_MULTISAMPLE_MODE_MS2 0x00000001 +#define NV50_3D_MULTISAMPLE_MODE_MS4 0x00000002 +#define NV50_3D_MULTISAMPLE_MODE_MS8 0x00000003 +#define NV50_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004 +#define NV50_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005 +#define NV50_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008 +#define NV50_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009 +#define NV50_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a + +#define NV50_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NV50_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 +#define NV84_3D_VERTEX_BEGIN_D3D_PRIMITIVE_ID_CONT 0x20000000 +#define NVA0_3D_VERTEX_BEGIN_D3D_INSTANCE_CONT 0x40000000 + +#define NV50_3D_VERTEX_END_D3D 0x000015d8 +#define NV50_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVA0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NV50_3D_VERTEX_BEGIN_GL 0x000015dc +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000 +#define NV84_3D_VERTEX_BEGIN_GL_PRIMITIVE_ID_CONT 0x20000000 +#define NVA0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x40000000 + +#define NV50_3D_VERTEX_END_GL 0x000015e0 +#define NV50_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVA0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NV50_3D_EDGEFLAG_ENABLE 0x000015e4 + +#define NV50_3D_VB_ELEMENT_U32 0x000015e8 + +#define NV50_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NV50_3D_VB_ELEMENT_U16 0x000015f0 +#define NV50_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NV50_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NV50_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NV50_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NV50_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NV50_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NV50_3D_VERTEX_DATA 0x00001640 + +#define NV50_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NV50_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NV50_3D_VP_ATTR_EN(i0) (0x00001650 + 0x4*(i0)) +#define NV50_3D_VP_ATTR_EN__ESIZE 0x00000004 +#define NV50_3D_VP_ATTR_EN__LEN 0x00000002 +#define NV50_3D_VP_ATTR_EN_7__MASK 0xf0000000 +#define NV50_3D_VP_ATTR_EN_7__SHIFT 28 +#define NV50_3D_VP_ATTR_EN_7_X 0x10000000 +#define NV50_3D_VP_ATTR_EN_7_Y 0x20000000 +#define NV50_3D_VP_ATTR_EN_7_Z 0x40000000 +#define NV50_3D_VP_ATTR_EN_7_W 0x80000000 +#define NV50_3D_VP_ATTR_EN_6__MASK 0x0f000000 +#define NV50_3D_VP_ATTR_EN_6__SHIFT 24 +#define NV50_3D_VP_ATTR_EN_6_X 0x01000000 +#define NV50_3D_VP_ATTR_EN_6_Y 0x02000000 +#define NV50_3D_VP_ATTR_EN_6_Z 0x04000000 +#define NV50_3D_VP_ATTR_EN_6_W 0x08000000 +#define NV50_3D_VP_ATTR_EN_5__MASK 0x00f00000 +#define NV50_3D_VP_ATTR_EN_5__SHIFT 20 +#define NV50_3D_VP_ATTR_EN_5_X 0x00100000 +#define NV50_3D_VP_ATTR_EN_5_Y 0x00200000 +#define NV50_3D_VP_ATTR_EN_5_Z 0x00400000 +#define NV50_3D_VP_ATTR_EN_5_W 0x00800000 +#define NV50_3D_VP_ATTR_EN_4__MASK 0x000f0000 +#define NV50_3D_VP_ATTR_EN_4__SHIFT 16 +#define NV50_3D_VP_ATTR_EN_4_X 0x00010000 +#define NV50_3D_VP_ATTR_EN_4_Y 0x00020000 +#define NV50_3D_VP_ATTR_EN_4_Z 0x00040000 +#define NV50_3D_VP_ATTR_EN_4_W 0x00080000 +#define NV50_3D_VP_ATTR_EN_3__MASK 0x0000f000 +#define NV50_3D_VP_ATTR_EN_3__SHIFT 12 +#define NV50_3D_VP_ATTR_EN_3_X 0x00001000 +#define NV50_3D_VP_ATTR_EN_3_Y 0x00002000 +#define NV50_3D_VP_ATTR_EN_3_Z 0x00004000 +#define NV50_3D_VP_ATTR_EN_3_W 0x00008000 +#define NV50_3D_VP_ATTR_EN_2__MASK 0x00000f00 +#define NV50_3D_VP_ATTR_EN_2__SHIFT 8 +#define NV50_3D_VP_ATTR_EN_2_X 0x00000100 +#define NV50_3D_VP_ATTR_EN_2_Y 0x00000200 +#define NV50_3D_VP_ATTR_EN_2_Z 0x00000400 +#define NV50_3D_VP_ATTR_EN_2_W 0x00000800 +#define NV50_3D_VP_ATTR_EN_1__MASK 0x000000f0 +#define NV50_3D_VP_ATTR_EN_1__SHIFT 4 +#define NV50_3D_VP_ATTR_EN_1_X 0x00000010 +#define NV50_3D_VP_ATTR_EN_1_Y 0x00000020 +#define NV50_3D_VP_ATTR_EN_1_Z 0x00000040 +#define NV50_3D_VP_ATTR_EN_1_W 0x00000080 +#define NV50_3D_VP_ATTR_EN_0__MASK 0x0000000f +#define NV50_3D_VP_ATTR_EN_0__SHIFT 0 +#define NV50_3D_VP_ATTR_EN_0_X 0x00000001 +#define NV50_3D_VP_ATTR_EN_0_Y 0x00000002 +#define NV50_3D_VP_ATTR_EN_0_Z 0x00000004 +#define NV50_3D_VP_ATTR_EN_0_W 0x00000008 + +#define NV50_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NV50_3D_POINT_RASTER_RULES 0x0000165c +#define NV50_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NV50_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NV50_3D_POINT_SPRITE_CTRL 0x00001660 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__MASK 0x00000010 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__SHIFT 4 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_UPPER_LEFT 0x00000010 + +#define NVA0_3D_TEX_MISC 0x00001664 +#define NVA0_3D_TEX_MISC_UNK1 0x00000002 +#define NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NV50_3D_LINE_SMOOTH_BLUR 0x00001668 +#define NV50_3D_LINE_SMOOTH_BLUR_LOW 0x00000000 +#define NV50_3D_LINE_SMOOTH_BLUR_MEDIUM 0x00000001 +#define NV50_3D_LINE_SMOOTH_BLUR_HIGH 0x00000002 + +#define NV50_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NV50_3D_COVERAGE_LUT(i0) (0x00001670 + 0x4*(i0)) +#define NV50_3D_COVERAGE_LUT__ESIZE 0x00000004 +#define NV50_3D_COVERAGE_LUT__LEN 0x00000004 +#define NV50_3D_COVERAGE_LUT_0__MASK 0x000000ff +#define NV50_3D_COVERAGE_LUT_0__SHIFT 0 +#define NV50_3D_COVERAGE_LUT_1__MASK 0x0000ff00 +#define NV50_3D_COVERAGE_LUT_1__SHIFT 8 +#define NV50_3D_COVERAGE_LUT_2__MASK 0x00ff0000 +#define NV50_3D_COVERAGE_LUT_2__SHIFT 16 +#define NV50_3D_COVERAGE_LUT_3__MASK 0xff000000 +#define NV50_3D_COVERAGE_LUT_3__SHIFT 24 + +#define NV50_3D_LINE_STIPPLE 0x00001680 +#define NV50_3D_LINE_STIPPLE_FACTOR_M1__MASK 0x000000ff +#define NV50_3D_LINE_STIPPLE_FACTOR_M1__SHIFT 0 +#define NV50_3D_LINE_STIPPLE_PATTERN__MASK 0x00ffff00 +#define NV50_3D_LINE_STIPPLE_PATTERN__SHIFT 8 + +#define NV50_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NV50_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NV50_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NV50_3D_UNK1690 0x00001690 +#define NV50_3D_UNK1690_ALWAYS_DERIV 0x00000001 +#define NV50_3D_UNK1690_UNK16 0x00010000 + +#define NV50_3D_SET_PROGRAM_CB 0x00001694 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM__MASK 0x000000f0 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM__SHIFT 4 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030 +#define NV50_3D_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_3D_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_3D_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_3D_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_3D_SET_PROGRAM_CB_VALID 0x00000001 + +#define NV50_3D_UNK1698 0x00001698 +#define NV50_3D_UNK1698_0 0x00000001 +#define NV50_3D_UNK1698_1 0x00000010 +#define NV50_3D_UNK1698_2 0x00000100 + +#define NVA3_3D_SAMPLE_SHADING 0x0000169c +#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f +#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0 +#define NVA3_3D_SAMPLE_SHADING_ENABLE 0x00000010 + +#define NVA3_3D_UNK16A0 0x000016a0 + +#define NV50_3D_VP_RESULT_MAP_SIZE 0x000016ac + +#define NV50_3D_VP_REG_ALLOC_TEMP 0x000016b0 + +#define NVA0_3D_UNK16B4 0x000016b4 +#define NVA0_3D_UNK16B4_UNK0 0x00000001 +#define NVA3_3D_UNK16B4_UNK1 0x00000002 + +#define NV50_3D_VP_REG_ALLOC_RESULT 0x000016b8 + +#define NV50_3D_VP_RESULT_MAP(i0) (0x000016bc + 0x4*(i0)) +#define NV50_3D_VP_RESULT_MAP__ESIZE 0x00000004 +#define NV50_3D_VP_RESULT_MAP__LEN 0x00000011 +#define NV50_3D_VP_RESULT_MAP_0__MASK 0x000000ff +#define NV50_3D_VP_RESULT_MAP_0__SHIFT 0 +#define NV50_3D_VP_RESULT_MAP_1__MASK 0x0000ff00 +#define NV50_3D_VP_RESULT_MAP_1__SHIFT 8 +#define NV50_3D_VP_RESULT_MAP_2__MASK 0x00ff0000 +#define NV50_3D_VP_RESULT_MAP_2__SHIFT 16 +#define NV50_3D_VP_RESULT_MAP_3__MASK 0xff000000 +#define NV50_3D_VP_RESULT_MAP_3__SHIFT 24 + +#define NV50_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NV50_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NV50_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVA0_3D_STRMOUT_OFFSET(i0) (0x00001780 + 0x4*(i0)) +#define NVA0_3D_STRMOUT_OFFSET__ESIZE 0x00000004 +#define NVA0_3D_STRMOUT_OFFSET__LEN 0x00000004 + +#define NV50_3D_GP_ENABLE 0x00001798 + +#define NV50_3D_GP_REG_ALLOC_TEMP 0x000017a0 + +#define NV50_3D_GP_REG_ALLOC_RESULT 0x000017a8 + +#define NV50_3D_GP_RESULT_MAP_SIZE 0x000017ac + +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003 + +#define NV50_3D_RASTERIZE_ENABLE 0x000017b4 + +#define NV50_3D_STRMOUT_ENABLE 0x000017b8 + +#define NV50_3D_GP_RESULT_MAP(i0) (0x000017fc + 0x4*(i0)) +#define NV50_3D_GP_RESULT_MAP__ESIZE 0x00000004 +#define NV50_3D_GP_RESULT_MAP__LEN 0x00000021 +#define NV50_3D_GP_RESULT_MAP_0__MASK 0x000000ff +#define NV50_3D_GP_RESULT_MAP_0__SHIFT 0 +#define NV50_3D_GP_RESULT_MAP_1__MASK 0x0000ff00 +#define NV50_3D_GP_RESULT_MAP_1__SHIFT 8 +#define NV50_3D_GP_RESULT_MAP_2__MASK 0x00ff0000 +#define NV50_3D_GP_RESULT_MAP_2__SHIFT 16 +#define NV50_3D_GP_RESULT_MAP_3__MASK 0xff000000 +#define NV50_3D_GP_RESULT_MAP_3__SHIFT 24 + +#define NV50_3D_UNK187C 0x0000187c + +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT(i0) (0x00001880 + 0x4*(i0)) +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__ESIZE 0x00000004 +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__LEN 0x00000020 + +#define NV50_3D_GP_VIEWPORT_ID_ENABLE 0x00001900 + +#define NV50_3D_MAP_SEMANTIC_0 0x00001904 +#define NV50_3D_MAP_SEMANTIC_0_FFC0_ID__MASK 0x000000ff +#define NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT 0 +#define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__MASK 0x0000ff00 +#define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__SHIFT 8 +#define NV50_3D_MAP_SEMANTIC_0_COLR_NR__MASK 0x00ff0000 +#define NV50_3D_MAP_SEMANTIC_0_COLR_NR__SHIFT 16 +#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0xff000000 + +#define NV50_3D_MAP_SEMANTIC_1 0x00001908 +#define NV50_3D_MAP_SEMANTIC_1_CLIP_START__MASK 0x000000ff +#define NV50_3D_MAP_SEMANTIC_1_CLIP_START__SHIFT 0 +#define NV50_3D_MAP_SEMANTIC_1_CLIP_NUM__MASK 0x00000f00 +#define NV50_3D_MAP_SEMANTIC_1_CLIP_NUM__SHIFT 8 + +#define NV50_3D_MAP_SEMANTIC_2 0x0000190c +#define NV50_3D_MAP_SEMANTIC_2_LAYER_ID__MASK 0x000000ff +#define NV50_3D_MAP_SEMANTIC_2_LAYER_ID__SHIFT 0 + +#define NV50_3D_MAP_SEMANTIC_3 0x00001910 +#define NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK 0x00000001 +#define NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__SHIFT 0 +#define NV50_3D_MAP_SEMANTIC_3_PTSZ_ID__MASK 0x00000ff0 +#define NV50_3D_MAP_SEMANTIC_3_PTSZ_ID__SHIFT 4 + +#define NV50_3D_MAP_SEMANTIC_4 0x00001914 +#define NV50_3D_MAP_SEMANTIC_4_PRIM_ID__MASK 0x000000ff +#define NV50_3D_MAP_SEMANTIC_4_PRIM_ID__SHIFT 0 + +#define NV50_3D_CULL_FACE_ENABLE 0x00001918 + +#define NV50_3D_FRONT_FACE 0x0000191c +#define NV50_3D_FRONT_FACE_CW 0x00000900 +#define NV50_3D_FRONT_FACE_CCW 0x00000901 + +#define NV50_3D_CULL_FACE 0x00001920 +#define NV50_3D_CULL_FACE_FRONT 0x00000404 +#define NV50_3D_CULL_FACE_BACK 0x00000405 +#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NV50_3D_UNK1924 0x00001924 + +#define NVA3_3D_FP_MULTISAMPLE 0x00001928 +#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001 +#define NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE 0x00000002 + +#define NV50_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 +#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 +#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NV84_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 + +#define NV50_3D_UNK1940 0x00001940 +#define NV50_3D_UNK1940_0 0x00000001 +#define NV50_3D_UNK1940_1 0x00000010 +#define NV50_3D_UNK1940_2 0x00000100 +#define NV50_3D_UNK1940_3 0x00001000 +#define NV50_3D_UNK1940_4 0x00010000 +#define NV50_3D_UNK1940_5 0x00100000 +#define NV50_3D_UNK1940_6 0x01000000 +#define NV50_3D_UNK1940_7 0x10000000 + +#define NVA3_3D_UNK1944 0x00001944 + +#define NV50_3D_CLIP_RECTS_EN 0x0000194c + +#define NV50_3D_CLIP_RECTS_MODE 0x00001950 +#define NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NV50_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NV50_3D_CLIP_RECTS_MODE_NEVER 0x00000002 + +#define NV50_3D_ZCULL_VALIDATE 0x00001954 +#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK0 0x00000001 +#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK1 0x00000010 + +#define NV50_3D_ZCULL_INVALIDATE 0x00001958 + +#define NVA3_3D_UNK1960 0x00001960 +#define NVA3_3D_UNK1960_0 0x00000001 +#define NVA3_3D_UNK1960_1 0x00000010 + +#define NV50_3D_UNK1968 0x00001968 +#define NV50_3D_UNK1968_0 0x00000001 +#define NV50_3D_UNK1968_1 0x00000010 + +#define NV50_3D_FP_CTRL_UNK196C 0x0000196c +#define NV50_3D_FP_CTRL_UNK196C_0 0x00000001 +#define NV50_3D_FP_CTRL_UNK196C_1 0x00000010 + +#define NV50_3D_UNK1978 0x00001978 + +#define NV50_3D_CLIPID_ENABLE 0x0000197c + +#define NV50_3D_CLIPID_WIDTH 0x00001980 +#define NV50_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NV50_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NV50_3D_CLIPID_ID 0x00001984 + +#define NV50_3D_FP_INTERPOLANT_CTRL 0x00001988 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__MASK 0xff000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__SHIFT 24 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_X 0x01000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Y 0x02000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Z 0x04000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_W 0x08000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__MASK 0x00ff0000 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT 16 +#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__MASK 0x0000ff00 +#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__SHIFT 8 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__MASK 0x000000ff +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT 0 + +#define NV50_3D_FP_REG_ALLOC_TEMP 0x0000198c + +#define NV50_3D_REG_MODE 0x000019a0 +#define NV50_3D_REG_MODE_PACKED 0x00000001 +#define NV50_3D_REG_MODE_STRIPED 0x00000002 + +#define NV50_3D_FP_CONTROL 0x000019a8 +#define NV50_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001 +#define NV50_3D_FP_CONTROL_EXPORTS_Z 0x00000100 +#define NV50_3D_FP_CONTROL_USES_KIL 0x00100000 + +#define NV50_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NV50_3D_UNK19C0 0x000019c0 + +#define NV50_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NV50_3D_LOGIC_OP 0x000019c8 +#define NV50_3D_LOGIC_OP_CLEAR 0x00001500 +#define NV50_3D_LOGIC_OP_AND 0x00001501 +#define NV50_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV50_3D_LOGIC_OP_COPY 0x00001503 +#define NV50_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV50_3D_LOGIC_OP_NOOP 0x00001505 +#define NV50_3D_LOGIC_OP_XOR 0x00001506 +#define NV50_3D_LOGIC_OP_OR 0x00001507 +#define NV50_3D_LOGIC_OP_NOR 0x00001508 +#define NV50_3D_LOGIC_OP_EQUIV 0x00001509 +#define NV50_3D_LOGIC_OP_INVERT 0x0000150a +#define NV50_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV50_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV50_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV50_3D_LOGIC_OP_NAND 0x0000150e +#define NV50_3D_LOGIC_OP_SET 0x0000150f + +#define NV50_3D_ZETA_COMP_ENABLE 0x000019cc + +#define NV50_3D_CLEAR_BUFFERS 0x000019d0 +#define NV50_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NV50_3D_CLEAR_BUFFERS_S 0x00000002 +#define NV50_3D_CLEAR_BUFFERS_R 0x00000004 +#define NV50_3D_CLEAR_BUFFERS_G 0x00000008 +#define NV50_3D_CLEAR_BUFFERS_B 0x00000010 +#define NV50_3D_CLEAR_BUFFERS_A 0x00000020 +#define NV50_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NV50_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NV50_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NV50_3D_CLIPID_FILL 0x000019d4 + +#define NV50_3D_UNK19D8(i0) (0x000019d8 + 0x4*(i0)) +#define NV50_3D_UNK19D8__ESIZE 0x00000004 +#define NV50_3D_UNK19D8__LEN 0x00000002 + +#define NV50_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0)) +#define NV50_3D_RT_COMP_ENABLE__ESIZE 0x00000004 +#define NV50_3D_RT_COMP_ENABLE__LEN 0x00000008 + +#define NV50_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NV50_3D_COLOR_MASK__ESIZE 0x00000004 +#define NV50_3D_COLOR_MASK__LEN 0x00000008 +#define NV50_3D_COLOR_MASK_R 0x0000000f +#define NV50_3D_COLOR_MASK_G 0x000000f0 +#define NV50_3D_COLOR_MASK_B 0x00000f00 +#define NV50_3D_COLOR_MASK_A 0x0000f000 + +#define NV50_3D_UNK1A20 0x00001a20 + +#define NV50_3D_DELAY 0x00001a24 + +#define NV50_3D_UNK1A28 0x00001a28 +#define NV50_3D_UNK1A28_0__MASK 0x000000ff +#define NV50_3D_UNK1A28_0__SHIFT 0 +#define NV50_3D_UNK1A28_1 0x00000100 + +#define NV50_3D_UNK1A2C 0x00001a2c + +#define NV50_3D_UNK1A30 0x00001a30 + +#define NV50_3D_UNK1A34 0x00001a34 + +#define NV50_3D_UNK1A38 0x00001a38 + +#define NV50_3D_UNK1A3C 0x00001a3c + +#define NV50_3D_UNK1A40(i0) (0x00001a40 + 0x4*(i0)) +#define NV50_3D_UNK1A40__ESIZE 0x00000004 +#define NV50_3D_UNK1A40__LEN 0x00000010 +#define NV50_3D_UNK1A40_0__MASK 0x00000007 +#define NV50_3D_UNK1A40_0__SHIFT 0 +#define NV50_3D_UNK1A40_1__MASK 0x00000070 +#define NV50_3D_UNK1A40_1__SHIFT 4 +#define NV50_3D_UNK1A40_2__MASK 0x00000700 +#define NV50_3D_UNK1A40_2__SHIFT 8 +#define NV50_3D_UNK1A40_3__MASK 0x00007000 +#define NV50_3D_UNK1A40_3__SHIFT 12 +#define NV50_3D_UNK1A40_4__MASK 0x00070000 +#define NV50_3D_UNK1A40_4__SHIFT 16 +#define NV50_3D_UNK1A40_5__MASK 0x00700000 +#define NV50_3D_UNK1A40_5__SHIFT 20 +#define NV50_3D_UNK1A40_6__MASK 0x07000000 +#define NV50_3D_UNK1A40_6__SHIFT 24 +#define NV50_3D_UNK1A40_7__MASK 0x70000000 +#define NV50_3D_UNK1A40_7__SHIFT 28 + +#define NV50_3D_STRMOUT_ADDRESS_HIGH(i0) (0x00001a80 + 0x10*(i0)) +#define NV50_3D_STRMOUT_ADDRESS_HIGH__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_ADDRESS_HIGH__LEN 0x00000004 + +#define NV50_3D_STRMOUT_ADDRESS_LOW(i0) (0x00001a84 + 0x10*(i0)) +#define NV50_3D_STRMOUT_ADDRESS_LOW__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_ADDRESS_LOW__LEN 0x00000004 + +#define NV50_3D_STRMOUT_NUM_ATTRIBS(i0) (0x00001a88 + 0x10*(i0)) +#define NV50_3D_STRMOUT_NUM_ATTRIBS__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_NUM_ATTRIBS__LEN 0x00000004 +#define NV50_3D_STRMOUT_NUM_ATTRIBS__MAX 0x00000040 + +#define NVA0_3D_STRMOUT_OFFSET_LIMIT(i0) (0x00001a8c + 0x10*(i0)) +#define NVA0_3D_STRMOUT_OFFSET_LIMIT__ESIZE 0x00000010 +#define NVA0_3D_STRMOUT_OFFSET_LIMIT__LEN 0x00000004 + +#define NV50_3D_VERTEX_ARRAY_ATTRIB(i0) (0x00001ac0 + 0x4*(i0)) +#define NV50_3D_VERTEX_ARRAY_ATTRIB__ESIZE 0x00000004 +#define NV50_3D_VERTEX_ARRAY_ATTRIB__LEN 0x00000010 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__MASK 0x0000000f +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__SHIFT 0 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_CONST 0x00000010 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__MASK 0x0007ffe0 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__SHIFT 5 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__MASK 0x01f80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__SHIFT 19 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BGRA 0x80000000 + +#define NV50_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NV50_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NV50_3D_QUERY_SEQUENCE 0x00001b08 + +#define NV50_3D_QUERY_GET 0x00001b0c +#define NV50_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NV50_3D_QUERY_GET_MODE__SHIFT 0 +#define NV50_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NV50_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NV50_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NV50_3D_QUERY_GET_UNK4 0x00000010 +#define NVA0_3D_QUERY_GET_INDEX__MASK 0x000000e0 +#define NVA0_3D_QUERY_GET_INDEX__SHIFT 5 +#define NV50_3D_QUERY_GET_UNK8 0x00000100 +#define NV50_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NV50_3D_QUERY_GET_UNIT__SHIFT 12 +#define NV50_3D_QUERY_GET_UNIT_UNK00 0x00000000 +#define NV50_3D_QUERY_GET_UNIT_VFETCH 0x00001000 +#define NV50_3D_QUERY_GET_UNIT_VP 0x00002000 +#define NV50_3D_QUERY_GET_UNIT_RAST 0x00004000 +#define NV50_3D_QUERY_GET_UNIT_STRMOUT 0x00005000 +#define NV50_3D_QUERY_GET_UNIT_GP 0x00006000 +#define NV50_3D_QUERY_GET_UNIT_ZCULL 0x00007000 +#define NV50_3D_QUERY_GET_UNIT_TPROP 0x0000a000 +#define NV50_3D_QUERY_GET_UNIT_UNK0C 0x0000c000 +#define NV50_3D_QUERY_GET_UNIT_CROP 0x0000f000 +#define NV50_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NV50_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NV50_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NV50_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NV50_3D_QUERY_GET_INTR 0x00100000 +#define NV50_3D_QUERY_GET_TYPE__MASK 0x00800000 +#define NV50_3D_QUERY_GET_TYPE__SHIFT 23 +#define NV50_3D_QUERY_GET_TYPE_QUERY 0x00000000 +#define NV50_3D_QUERY_GET_TYPE_COUNTER 0x00800000 +#define NV50_3D_QUERY_GET_QUERY_SELECT__MASK 0x0f000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT__SHIFT 24 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZERO 0x00000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_SAMPLECNT 0x01000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_STRMOUT_NO_OVERFLOW 0x02000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_DROPPED_PRIMITIVES 0x03000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_VERTICES 0x04000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK0 0x05000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK1 0x06000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK2 0x07000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK3 0x08000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_RT_UNK14 0x0c000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_OFFSET 0x0d000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT__MASK 0x0f000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT__SHIFT 24 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_VERTICES 0x00000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_PRIMITIVES 0x01000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VP_LAUNCHES 0x02000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_LAUNCHES 0x03000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_PRIMITIVES_OUT 0x04000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_TRANSFORM_FEEDBACK 0x05000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GENERATED_PRIMITIVES 0x06000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_PRECLIP 0x07000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_POSTCLIP 0x08000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_FP_PIXELS 0x09000000 +#define NV84_3D_QUERY_GET_COUNTER_SELECT_UNK0A 0x0a000000 +#define NVA0_3D_QUERY_GET_COUNTER_SELECT_UNK0C 0x0c000000 +#define NV50_3D_QUERY_GET_SHORT 0x10000000 + +#define NVA3_3D_VP_RESULT_MAP_ALT(i0) (0x00001b3c + 0x4*(i0)) +#define NVA3_3D_VP_RESULT_MAP_ALT__ESIZE 0x00000004 +#define NVA3_3D_VP_RESULT_MAP_ALT__LEN 0x00000020 +#define NVA3_3D_VP_RESULT_MAP_ALT_0__MASK 0x000000ff +#define NVA3_3D_VP_RESULT_MAP_ALT_0__SHIFT 0 +#define NVA3_3D_VP_RESULT_MAP_ALT_1__MASK 0x0000ff00 +#define NVA3_3D_VP_RESULT_MAP_ALT_1__SHIFT 8 +#define NVA3_3D_VP_RESULT_MAP_ALT_2__MASK 0x00ff0000 +#define NVA3_3D_VP_RESULT_MAP_ALT_2__SHIFT 16 +#define NVA3_3D_VP_RESULT_MAP_ALT_3__MASK 0xff000000 +#define NVA3_3D_VP_RESULT_MAP_ALT_3__SHIFT 24 + +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT(i0) (0x00001c00 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__LEN 0x00000020 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__MASK 0x00000fff +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__SHIFT 0 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_ENABLE 0x20000000 + +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT(i0) (0x00001c04 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT(i0) (0x00001c08 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT(i0) (0x00001c0c + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__LEN 0x00000020 + +#define NVA3_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVA3_3D_IBLEND__ESIZE 0x00000020 +#define NVA3_3D_IBLEND__LEN 0x00000008 + +#define NVA3_3D_IBLEND_UNK00(i0) (0x00001e00 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVA3_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVA3_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVA3_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVA3_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVA3_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT(i0) (0x00001f00 + 0x8*(i0)) +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__ESIZE 0x00000008 +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT(i0) (0x00001f04 + 0x8*(i0)) +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__ESIZE 0x00000008 +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__LEN 0x00000020 + + +#endif /* NV50_3D_XML */ diff --git a/src/gallium/drivers/nv50/nv50_3ddefs.xml.h b/src/gallium/drivers/nv50/nv50_3ddefs.xml.h new file mode 100644 index 0000000000..f26ac45da4 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nv50/nv50_buffer.c b/src/gallium/drivers/nv50/nv50_buffer.c deleted file mode 100644 index 45356f9f63..0000000000 --- a/src/gallium/drivers/nv50/nv50_buffer.c +++ /dev/null @@ -1,151 +0,0 @@ - -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" - -#include "nouveau/nouveau_screen.h" -#include "nouveau/nouveau_winsys.h" -#include "nv50_resource.h" - - - -static void nv50_buffer_destroy(struct pipe_screen *pscreen, - struct pipe_resource *presource) -{ - struct nv50_resource *buffer = nv50_resource(presource); - - nouveau_screen_bo_release(pscreen, buffer->bo); - FREE(buffer); -} - - - - -/* Utility functions for transfer create/destroy are hooked in and - * just record the arguments to those functions. - */ -static void * -nv50_buffer_transfer_map( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - struct nv50_resource *buffer = nv50_resource(transfer->resource); - uint8_t *map; - - map = nouveau_screen_bo_map_range( pipe->screen, - buffer->bo, - transfer->box.x, - transfer->box.width, - nouveau_screen_transfer_flags(transfer->usage) ); - if (map == NULL) - return NULL; - - return map + transfer->box.x; -} - - - -static void nv50_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct nv50_resource *buffer = nv50_resource(transfer->resource); - - nouveau_screen_bo_map_flush_range(pipe->screen, - buffer->bo, - transfer->box.x + box->x, - box->width); -} - -static void nv50_buffer_transfer_unmap( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - struct nv50_resource *buffer = nv50_resource(transfer->resource); - - nouveau_screen_bo_unmap(pipe->screen, buffer->bo); -} - - - - -const struct u_resource_vtbl nv50_buffer_vtbl = -{ - u_default_resource_get_handle, /* get_handle */ - nv50_buffer_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - nv50_buffer_transfer_map, /* transfer_map */ - nv50_buffer_transfer_flush_region, /* transfer_flush_region */ - nv50_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - - - - -struct pipe_resource * -nv50_buffer_create(struct pipe_screen *pscreen, - const struct pipe_resource *template) -{ - struct nv50_resource *buffer; - - buffer = CALLOC_STRUCT(nv50_resource); - if (!buffer) - return NULL; - - buffer->base = *template; - buffer->vtbl = &nv50_buffer_vtbl; - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = pscreen; - - buffer->bo = nouveau_screen_bo_new(pscreen, - 16, - buffer->base.usage, - buffer->base.bind, - buffer->base.width0); - - if (buffer->bo == NULL) - goto fail; - - return &buffer->base; - -fail: - FREE(buffer); - return NULL; -} - - -struct pipe_resource * -nv50_user_buffer_create(struct pipe_screen *pscreen, - void *ptr, - unsigned bytes, - unsigned bind) -{ - struct nv50_resource *buffer; - - buffer = CALLOC_STRUCT(nv50_resource); - if (!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->vtbl = &nv50_buffer_vtbl; - buffer->base.screen = pscreen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.bind = bind; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->base.array_size = 1; - - buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); - if (!buffer->bo) - goto fail; - - return &buffer->base; - -fail: - FREE(buffer); - return NULL; -} - diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c deleted file mode 100644 index ee7cf281f4..0000000000 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008 Ben Skeggs - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "nv50_context.h" - -void -nv50_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct pipe_framebuffer_state *fb = &nv50->framebuffer; - unsigned mode = 0, i; - const unsigned dirty = nv50->dirty; - - /* don't need NEW_BLEND, NV50TCL_COLOR_MASK doesn't affect CLEAR_BUFFERS */ - nv50->dirty &= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; - if (!nv50_state_validate(nv50, 64)) - return; - - if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4); - OUT_RING (chan, fui(rgba[0])); - OUT_RING (chan, fui(rgba[1])); - OUT_RING (chan, fui(rgba[2])); - OUT_RING (chan, fui(rgba[3])); - mode |= 0x3c; - } - - if (buffers & PIPE_CLEAR_DEPTH) { - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1); - OUT_RING (chan, fui(depth)); - mode |= NV50TCL_CLEAR_BUFFERS_Z; - } - if (buffers & PIPE_CLEAR_STENCIL) { - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1); - OUT_RING (chan, stencil & 0xff); - mode |= NV50TCL_CLEAR_BUFFERS_S; - } - - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); - OUT_RING (chan, mode); - - for (i = 1; i < fb->nr_cbufs; i++) { - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); - OUT_RING (chan, (i << 6) | 0x3c); - } - nv50->dirty = dirty; -} - diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 0874cb5e4e..930cee7c1e 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Ben Skeggs + * Copyright 2010 Christoph Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,76 +27,179 @@ #include "nv50_screen.h" #include "nv50_resource.h" +#include "nouveau/nouveau_reloc.h" + static void -nv50_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) +nv50_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_screen *screen = &nv50_context(pipe)->screen->base; + + if (fence) + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, nv50->screen->tesla, 0x1338, 1); - OUT_RING (chan, 0x20); - } + /* Try to emit before firing to avoid having to flush again right after + * in case we have to wait on this fence. + */ + nouveau_fence_emit(screen->fence.current); - if (flags & PIPE_FLUSH_FRAME) - FIRE_RING(chan); + FIRE_RING(screen->channel); +} + +void +nv50_default_flush_notify(struct nouveau_channel *chan) +{ + struct nv50_context *nv50 = chan->user_private; + + if (!nv50) + return; + + nouveau_fence_update(&nv50->screen->base, TRUE); + nouveau_fence_next(&nv50->screen->base); } static void -nv50_destroy(struct pipe_context *pipe) +nv50_context_unreference_resources(struct nv50_context *nv50) { - struct nv50_context *nv50 = nv50_context(pipe); - int i; + unsigned s, i; + + for (i = 0; i < NV50_BUFCTX_COUNT; ++i) + nv50_bufctx_reset(nv50, i); - for (i = 0; i < 64; i++) { - if (!nv50->state.hw[i]) - continue; - so_ref(NULL, &nv50->state.hw[i]); - } + for (i = 0; i < nv50->num_vtxbufs; ++i) + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); - draw_destroy(nv50->draw); + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); - if (nv50->screen->cur_ctx == nv50) - nv50->screen->cur_ctx = NULL; + for (s = 0; s < 3; ++s) { + for (i = 0; i < nv50->num_textures[s]; ++i) + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); - FREE(nv50); + for (i = 0; i < 16; ++i) + pipe_resource_reference(&nv50->constbuf[s][i], NULL); + } } +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50_context_unreference_resources(nv50); + + draw_destroy(nv50->draw); + + if (nv50->screen->cur_ctx == nv50) { + nv50->screen->base.channel->user_private = NULL; + nv50->screen->cur_ctx = NULL; + } + + FREE(nv50); +} struct pipe_context * nv50_create(struct pipe_screen *pscreen, void *priv) { - struct pipe_winsys *pipe_winsys = pscreen->winsys; - struct nv50_screen *screen = nv50_screen(pscreen); - struct nv50_context *nv50; + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *nv50; + struct pipe_context *pipe; - nv50 = CALLOC_STRUCT(nv50_context); - if (!nv50) - return NULL; - nv50->screen = screen; + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + pipe = &nv50->base.pipe; - nv50->pipe.winsys = pipe_winsys; - nv50->pipe.screen = pscreen; - nv50->pipe.priv = priv; + nv50->screen = screen; + nv50->base.screen = &screen->base; + nv50->base.copy_data = nv50_m2mf_copy_linear; + nv50->base.push_data = nv50_sifc_linear_u8; - nv50->pipe.destroy = nv50_destroy; + pipe->winsys = pipe_winsys; + pipe->screen = pscreen; + pipe->priv = priv; - nv50->pipe.draw_vbo = nv50_draw_vbo; - nv50->pipe.clear = nv50_clear; + pipe->destroy = nv50_destroy; - nv50->pipe.flush = nv50_flush; + pipe->draw_vbo = nv50_draw_vbo; + pipe->clear = nv50_clear; + + pipe->flush = nv50_flush; + + if (!screen->cur_ctx) + screen->cur_ctx = nv50; + screen->base.channel->user_private = nv50; + screen->base.channel->flush_notify = nv50_default_flush_notify; + + nv50_init_query_functions(nv50); + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_resource_functions(pipe); + + nv50->draw = draw_create(pipe); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + + return pipe; +} + +struct resident { + struct nv04_resource *res; + uint32_t flags; +}; + +void +nv50_bufctx_add_resident(struct nv50_context *nv50, int ctx, + struct nv04_resource *resource, uint32_t flags) +{ + struct resident rsd = { resource, flags }; + + if (!resource->bo) + return; + + /* We don't need to reference the resource here, it will be referenced + * in the context/state, and bufctx will be reset when state changes. + */ + util_dynarray_append(&nv50->residents[ctx], struct resident, rsd); +} + +void +nv50_bufctx_del_resident(struct nv50_context *nv50, int ctx, + struct nv04_resource *resource) +{ + struct resident *rsd, *top; + unsigned i; + + for (i = 0; i < nv50->residents[ctx].size / sizeof(struct resident); ++i) { + rsd = util_dynarray_element(&nv50->residents[ctx], struct resident, i); + + if (rsd->res == resource) { + top = util_dynarray_pop_ptr(&nv50->residents[ctx], struct resident); + if (rsd != top) + *rsd = *top; + break; + } + } +} + +void +nv50_bufctx_emit_relocs(struct nv50_context *nv50) +{ + struct resident *rsd; + struct util_dynarray *array; + unsigned ctx, i, n; - screen->base.channel->user_private = nv50; + for (ctx = 0; ctx < NV50_BUFCTX_COUNT; ++ctx) { + array = &nv50->residents[ctx]; - nv50_init_surface_functions(nv50); - nv50_init_state_functions(nv50); - nv50_init_query_functions(nv50); - nv50_init_resource_functions(&nv50->pipe); + n = array->size / sizeof(struct resident); + MARK_RING(nv50->screen->base.channel, n, n); + for (i = 0; i < n; ++i) { + rsd = util_dynarray_element(array, struct resident, i); - nv50->draw = draw_create(&nv50->pipe); - assert(nv50->draw); - draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + nv50_resource_validate(rsd->res, rsd->flags); + } + } - return &nv50->pipe; + nv50_screen_make_buffers_resident(nv50->screen); } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index b2b0b72fe2..46e6c2250a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -5,265 +5,230 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_compiler.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_inlines.h" +#include "util/u_dynarray.h" #include "draw/draw_vertex.h" -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_stateobj.h" -#include "nv50_reg.h" - +#include "nv50_winsys.h" +#include "nv50_stateobj.h" #include "nv50_screen.h" #include "nv50_program.h" +#include "nv50_resource.h" -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -#define nouveau_bo_tile_layout(nvbo) \ - ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) - -/* Constant buffer assignment */ -#define NV50_CB_PMISC 0 -#define NV50_CB_PVP 1 -#define NV50_CB_PFP 2 -#define NV50_CB_PGP 3 -#define NV50_CB_AUX 4 - -#define NV50_NEW_BLEND (1 << 0) -#define NV50_NEW_ZSA (1 << 1) -#define NV50_NEW_BLEND_COLOUR (1 << 2) -#define NV50_NEW_STIPPLE (1 << 3) -#define NV50_NEW_SCISSOR (1 << 4) -#define NV50_NEW_VIEWPORT (1 << 5) -#define NV50_NEW_RASTERIZER (1 << 6) -#define NV50_NEW_FRAMEBUFFER (1 << 7) -#define NV50_NEW_VERTPROG (1 << 8) -#define NV50_NEW_VERTPROG_CB (1 << 9) -#define NV50_NEW_FRAGPROG (1 << 10) -#define NV50_NEW_FRAGPROG_CB (1 << 11) -#define NV50_NEW_GEOMPROG (1 << 12) -#define NV50_NEW_GEOMPROG_CB (1 << 13) -#define NV50_NEW_ARRAYS (1 << 14) -#define NV50_NEW_SAMPLER (1 << 15) -#define NV50_NEW_TEXTURE (1 << 16) -#define NV50_NEW_STENCIL_REF (1 << 17) -#define NV50_NEW_CLIP (1 << 18) - -struct nv50_blend_stateobj { - struct pipe_blend_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv50_zsa_stateobj { - struct pipe_depth_stencil_alpha_state pipe; - struct nouveau_stateobj *so; -}; +#include "nouveau/nouveau_context.h" +#include "nouveau/nv_object.xml.h" +#include "nouveau/nv_m2mf.xml.h" +#include "nv50_3ddefs.xml.h" +#include "nv50_3d.xml.h" +#include "nv50_2d.xml.h" -struct nv50_rasterizer_stateobj { - struct pipe_rasterizer_state pipe; - struct nouveau_stateobj *so; -}; +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); -struct nv50_sampler_stateobj { - boolean normalized; - unsigned tsc[8]; -}; +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif -struct nv50_sampler_view { - struct pipe_sampler_view pipe; - uint32_t tic[8]; -}; +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_RASTERIZER (1 << 1) +#define NV50_NEW_ZSA (1 << 2) +#define NV50_NEW_VERTPROG (1 << 3) +#define NV50_NEW_GMTYPROG (1 << 6) +#define NV50_NEW_FRAGPROG (1 << 7) +#define NV50_NEW_BLEND_COLOUR (1 << 8) +#define NV50_NEW_STENCIL_REF (1 << 9) +#define NV50_NEW_CLIP (1 << 10) +#define NV50_NEW_SAMPLE_MASK (1 << 11) +#define NV50_NEW_FRAMEBUFFER (1 << 12) +#define NV50_NEW_STIPPLE (1 << 13) +#define NV50_NEW_SCISSOR (1 << 14) +#define NV50_NEW_VIEWPORT (1 << 15) +#define NV50_NEW_ARRAYS (1 << 16) +#define NV50_NEW_VERTEX (1 << 17) +#define NV50_NEW_CONSTBUF (1 << 18) +#define NV50_NEW_TEXTURES (1 << 19) +#define NV50_NEW_SAMPLERS (1 << 20) + +#define NV50_BUFCTX_CONSTANT 0 +#define NV50_BUFCTX_FRAME 1 +#define NV50_BUFCTX_VERTEX 2 +#define NV50_BUFCTX_TEXTURES 3 +#define NV50_BUFCTX_COUNT 4 + +/* fixed constant buffer binding points - low indices for user's constbufs */ +#define NV50_CB_PVP 124 +#define NV50_CB_PGP 126 +#define NV50_CB_PFP 125 +#define NV50_CB_AUX 127 -struct nv50_vtxelt_stateobj { - struct pipe_vertex_element pipe[16]; - unsigned num_elements; - uint32_t hw[16]; +struct nv50_context { + struct nouveau_context base; + + struct nv50_screen *screen; + + struct util_dynarray residents[NV50_BUFCTX_COUNT]; + + uint32_t dirty; + + struct { + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t interpolant_ctrl; + int32_t index_bias; + boolean prim_restart; + boolean point_sprite; + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[3]; + uint8_t num_samplers[3]; + uint16_t scissor; + } state; + + struct nv50_blend_stateobj *blend; + struct nv50_rasterizer_stateobj *rast; + struct nv50_zsa_stateobj *zsa; + struct nv50_vertex_stateobj *vertex; + + struct nv50_program *vertprog; + struct nv50_program *gmtyprog; + struct nv50_program *fragprog; + + struct pipe_resource *constbuf[3][16]; + uint16_t constbuf_dirty[3]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ + unsigned vbo_max_index; + + struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; + unsigned num_textures[3]; + struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[3]; + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_push_hint; + + struct draw_context *draw; }; -static INLINE struct nv50_sampler_view * -nv50_sampler_view(struct pipe_sampler_view *view) -{ - return (struct nv50_sampler_view *)view; -} - -static INLINE unsigned -get_tile_height(uint32_t tile_mode) -{ - return 1 << ((tile_mode & 0xf) + 2); -} - -static INLINE unsigned -get_tile_depth(uint32_t tile_mode) +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) { - return 1 << (tile_mode >> 4); + return (struct nv50_context *)pipe; } - struct nv50_surface { - struct pipe_surface base; - unsigned offset; + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; }; static INLINE struct nv50_surface * -nv50_surface(struct pipe_surface *pt) +nv50_surface(struct pipe_surface *ps) { - return (struct nv50_surface *)pt; + return (struct nv50_surface *)ps; } -struct nv50_state { - struct nouveau_stateobj *hw[64]; - uint64_t hw_dirty; - - unsigned sampler_view_nr[3]; - struct nouveau_stateobj *vtxbuf; - struct nouveau_stateobj *vtxattr; - unsigned vtxelt_nr; -}; +/* nv50_context.c */ +struct pipe_context *nv50_create(struct pipe_screen *, void *); -struct nv50_context { - struct pipe_context pipe; - - struct nv50_screen *screen; - - struct draw_context *draw; - - struct nv50_state state; - - unsigned dirty; - struct nv50_blend_stateobj *blend; - struct nv50_zsa_stateobj *zsa; - struct nv50_rasterizer_stateobj *rasterizer; - struct pipe_blend_color blend_colour; - struct pipe_stencil_ref stencil_ref; - struct pipe_poly_stipple stipple; - struct pipe_scissor_state scissor; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_clip_state clip; - struct nv50_program *vertprog; - struct nv50_program *fragprog; - struct nv50_program *geomprog; - struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - unsigned vtxbuf_nr; - struct pipe_index_buffer idxbuf; - struct nv50_vtxelt_stateobj *vtxelt; - struct nv50_sampler_stateobj *sampler[3][PIPE_MAX_SAMPLERS]; - unsigned sampler_nr[3]; - struct pipe_sampler_view *sampler_views[3][PIPE_MAX_SAMPLERS]; - unsigned sampler_view_nr[3]; - - unsigned vbo_fifo; - unsigned req_lmem; -}; +void nv50_default_flush_notify(struct nouveau_channel *); -static INLINE struct nv50_context * -nv50_context(struct pipe_context *pipe) +void nv50_bufctx_emit_relocs(struct nv50_context *); +void nv50_bufctx_add_resident(struct nv50_context *, int ctx, + struct nv04_resource *, uint32_t flags); +void nv50_bufctx_del_resident(struct nv50_context *, int ctx, + struct nv04_resource *); +static INLINE void +nv50_bufctx_reset(struct nv50_context *nv50, int ctx) { - return (struct nv50_context *)pipe; + util_dynarray_resize(&nv50->residents[ctx], 0); } -extern void nv50_init_surface_functions(struct nv50_context *nv50); -extern void nv50_init_state_functions(struct nv50_context *nv50); -extern void nv50_init_query_functions(struct nv50_context *nv50); -extern void nv50_init_transfer_functions(struct nv50_context *nv50); - -extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); - -extern int -nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h); - /* nv50_draw.c */ -extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); -/* nv50_vbo.c */ -extern void nv50_draw_vbo(struct pipe_context *pipe, - const struct pipe_draw_info *info); -extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); -extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50); +/* nv50_program.c */ +boolean nv50_program_translate(struct nv50_program *); +void nv50_program_destroy(struct nv50_context *, struct nv50_program *); -/* nv50_push.c */ -extern void -nv50_push_elements_instanced(struct pipe_context *, struct pipe_resource *, - unsigned idxsize, int idxbias, - unsigned mode, unsigned start, - unsigned count, unsigned i_start, - unsigned i_count); +/* nv50_query.c */ +void nv50_init_query_functions(struct nv50_context *); -/* nv50_clear.c */ -extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); +/* nv50_shader_state.c */ +void nv50_vertprog_validate(struct nv50_context *); +void nv50_gmtyprog_validate(struct nv50_context *); +void nv50_fragprog_validate(struct nv50_context *); +void nv50_fp_linkage_validate(struct nv50_context *); +void nv50_gp_linkage_validate(struct nv50_context *); +void nv50_constbufs_validate(struct nv50_context *); +void nv50_sprite_coords_validate(struct nv50_context *); -/* nv50_program.c */ -extern struct nouveau_stateobj * -nv50_vertprog_validate(struct nv50_context *nv50); -extern struct nouveau_stateobj * -nv50_fragprog_validate(struct nv50_context *nv50); -extern struct nouveau_stateobj * -nv50_geomprog_validate(struct nv50_context *nv50); -extern struct nouveau_stateobj * -nv50_fp_linkage_validate(struct nv50_context *nv50); -extern struct nouveau_stateobj * -nv50_gp_linkage_validate(struct nv50_context *nv50); -extern void nv50_program_destroy(struct nv50_context *nv50, - struct nv50_program *p); +/* nv50_state.c */ +extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords); +extern boolean nv50_state_validate(struct nv50_context *); -extern void nv50_so_init_sifc(struct nv50_context *nv50, - struct nouveau_stateobj *so, - struct nouveau_bo *bo, unsigned reloc, - unsigned offset, unsigned size); +/* nv50_surface.c */ +extern void nv50_clear(struct pipe_context *, unsigned buffers, + const float *rgba, double depth, unsigned stencil); +extern void nv50_init_surface_functions(struct nv50_context *); /* nv50_tex.c */ -extern boolean nv50_tex_construct(struct nv50_sampler_view *view); -extern void nv50_tex_relocs(struct nv50_context *); -extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *); +void nv50_validate_textures(struct nv50_context *); +void nv50_validate_samplers(struct nv50_context *); + +struct pipe_sampler_view * +nv50_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nv50_transfer.c */ +void +nv50_sifc_linear_u8(struct nouveau_context *pipe, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, void *data); +void +nv50_m2mf_copy_linear(struct nouveau_context *pipe, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size); + +/* nv50_vbo.c */ +void nv50_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); +void * +nv50_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nv50_vertex_state_delete(struct pipe_context *pipe, void *hwcso); -/* nv50_context.c */ -struct pipe_context * -nv50_create(struct pipe_screen *pscreen, void *priv); +void nv50_vertex_arrays_validate(struct nv50_context *nv50); -static INLINE unsigned -nv50_prim(unsigned mode) -{ - switch (mode) { - case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; - case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; - case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; - case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; - case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; - case PIPE_PRIM_TRIANGLE_STRIP: - return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; - case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; - case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; - case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; - case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; - case PIPE_PRIM_LINES_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; - default: - break; - } - - NOUVEAU_ERR("invalid primitive type %d\n", mode); - return NV50TCL_VERTEX_BEGIN_POINTS; -} +/* nv50_push.c */ +void nv50_push_vbo(struct nv50_context *, const struct pipe_draw_info *); #endif diff --git a/src/gallium/drivers/nv50/nv50_defs.xml.h b/src/gallium/drivers/nv50/nv50_defs.xml.h new file mode 100644 index 0000000000..1bf2f802b5 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_defs.xml.h @@ -0,0 +1,142 @@ +#ifndef NV50_DEFS_XML +#define NV50_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0 +#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1 +#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2 +#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9 +#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca +#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb +#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc +#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd +#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce +#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf +#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9 +#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da +#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db +#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc +#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd +#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de +#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df +#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0 +#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 +#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6 +#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7 +#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8 +#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9 +#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea +#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb +#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec +#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed +#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee +#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef +#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 +#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 +#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 +#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 +#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 +#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 +#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 +#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 +#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8 +#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9 +#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa +#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a +#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 +#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014 +#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015 +#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016 +#define NV50_ZETA_FORMAT_UNK18 0x00000018 +#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019 +#define NV50_ZETA_FORMAT_UNK1D 0x0000001d +#define NV50_ZETA_FORMAT_UNK1E 0x0000001e +#define NV50_ZETA_FORMAT_UNK1F 0x0000001f +#define NV50_QUERY__SIZE 0x00000010 +#define NV50_QUERY_COUNTER 0x00000000 + +#define NV50_QUERY_RES 0x00000004 + +#define NV50_QUERY_TIME 0x00000008 + + +#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index 2f6f607261..1d8598829c 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -25,32 +25,32 @@ #include "nv50_context.h" struct nv50_render_stage { - struct draw_stage stage; - struct nv50_context *nv50; + struct draw_stage stage; + struct nv50_context *nv50; }; static INLINE struct nv50_render_stage * nv50_render_stage(struct draw_stage *stage) { - return (struct nv50_render_stage *)stage; + return (struct nv50_render_stage *)stage; } static void nv50_render_point(struct draw_stage *stage, struct prim_header *prim) { - NOUVEAU_ERR("\n"); + NOUVEAU_ERR("\n"); } static void nv50_render_line(struct draw_stage *stage, struct prim_header *prim) { - NOUVEAU_ERR("\n"); + NOUVEAU_ERR("\n"); } static void nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) { - NOUVEAU_ERR("\n"); + NOUVEAU_ERR("\n"); } static void @@ -61,29 +61,28 @@ nv50_render_flush(struct draw_stage *stage, unsigned flags) static void nv50_render_reset_stipple_counter(struct draw_stage *stage) { - NOUVEAU_ERR("\n"); + NOUVEAU_ERR("\n"); } static void nv50_render_destroy(struct draw_stage *stage) { - FREE(stage); + FREE(stage); } struct draw_stage * nv50_draw_render_stage(struct nv50_context *nv50) { - struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); - rs->nv50 = nv50; - rs->stage.draw = nv50->draw; - rs->stage.destroy = nv50_render_destroy; - rs->stage.point = nv50_render_point; - rs->stage.line = nv50_render_line; - rs->stage.tri = nv50_render_tri; - rs->stage.flush = nv50_render_flush; - rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; - return &rs->stage; + return &rs->stage; } - diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index 4282809454..7946117cf3 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -21,26 +21,34 @@ */ #include "nv50_screen.h" -#include "nv50_texture.h" -#include "nv50_reg.h" +#include "nv50_texture.xml.h" +#include "nv50_defs.xml.h" +#include "nv50_3d.xml.h" #include "pipe/p_defines.h" -#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ - NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \ - NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \ - NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \ - NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \ - NV50TIC_0_0_FMT_##sz, \ - NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \ - NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | \ - (NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 << 3) | (r << 31) - -#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ - NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \ - NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \ - NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \ - NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \ - NV50TIC_0_0_FMT_##sz, 0 +#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, \ + NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | (r << 31) + +#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, 0 #define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER #define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW @@ -49,98 +57,96 @@ #define SCANOUT PIPE_BIND_SCANOUT /* for vertex buffers: */ -#define NV50TIC_0_0_FMT_8_8_8 NV50TIC_0_0_FMT_8_8_8_8 -#define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16 -#define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32 - -/* NOTE: using NV50_2D_DST_FORMAT for substitute formats used with 2D engine */ +#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 +#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32 const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = { /* COMMON FORMATS */ - [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50TCL_RT_FORMAT_A8R8G8B8_UNORM, + [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50TCL_RT_FORMAT_X8R8G8B8_UNORM, - A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, + A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50TCL_RT_FORMAT_A8R8G8B8_SRGB, + [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50TCL_RT_FORMAT_X8R8G8B8_SRGB, - A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, + A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_B5G6R5_UNORM] = { NV50TCL_RT_FORMAT_R5G6B5_UNORM, - B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), + [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50TCL_RT_FORMAT_A1R5G5B5_UNORM, + [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM, + [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0, B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, - [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50TCL_RT_FORMAT_A2B10G10R10_UNORM, + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, - [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50TCL_RT_FORMAT_A2R10G10B10_UNORM, + [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, /* DEPTH/STENCIL FORMATS */ - [PIPE_FORMAT_Z16_UNORM] = { NV50TCL_ZETA_FORMAT_Z16_UNORM, - B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_DEPTH, 0), + [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z16, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM, - B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), + [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, S8Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z24X8_UNORM] = { NV50TCL_ZETA_FORMAT_X8Z24_UNORM, - B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), + [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, X8Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM, - B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0), + [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_Z24S8_UNORM, + B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, Z24S8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, - [PIPE_FORMAT_Z32_FLOAT] = { NV50TCL_ZETA_FORMAT_Z32_FLOAT, - B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_DEPTH, 0), + [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { - NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, - B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0), + NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, X24S8Z32, 0), SAMPLER_VIEW | DEPTH_STENCIL }, /* LUMINANCE, ALPHA, INTENSITY */ - [PIPE_FORMAT_L8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM, - A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), - SAMPLER_VIEW }, + [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_L8_SRGB] = { NV50_2D_DST_FORMAT_R8_UNORM, - A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), - SAMPLER_VIEW }, + [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_I8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM, + [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), - SAMPLER_VIEW }, + SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_A8_UNORM] = { NV50TCL_RT_FORMAT_A8_UNORM, + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_L8A8_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM, + [PIPE_FORMAT_L8A8_UNORM] = { 0, A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, @@ -151,7 +157,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* DXT, RGTC */ [PIPE_FORMAT_DXT1_RGB] = { 0, - B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0), SAMPLER_VIEW }, [PIPE_FORMAT_DXT1_RGBA] = { 0, @@ -167,65 +173,65 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW }, [PIPE_FORMAT_RGTC1_UNORM] = { 0, - B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), SAMPLER_VIEW }, [PIPE_FORMAT_RGTC1_SNORM] = { 0, - B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0), + B_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC1, 0), SAMPLER_VIEW }, [PIPE_FORMAT_RGTC2_UNORM] = { 0, - B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0), + B_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC2, 0), SAMPLER_VIEW }, [PIPE_FORMAT_RGTC2_SNORM] = { 0, - B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0), + B_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC2, 0), SAMPLER_VIEW }, /* FLOAT 16 */ - [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT, - A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), + [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, + A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16G16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16_FLOAT, - A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), + [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, + A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_FLOAT] = { NV50TCL_RT_FORMAT_R16_FLOAT, - A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* FLOAT 32 */ - [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT, - A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), + [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, + A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R32G32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32_FLOAT, - A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), + [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, + A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R32_FLOAT] = { NV50TCL_RT_FORMAT_R32_FLOAT, - A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* ODD FORMATS */ - [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50TCL_RT_FORMAT_B10G11R11_FLOAT, - B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), + [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, - B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 5_9_9_9, 0), + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), SAMPLER_VIEW }, /* SNORM 32 */ @@ -235,15 +241,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32B32_SNORM] = { 0, - A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), + A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32_SNORM] = { 0, - A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0), + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32_SNORM] = { 0, - A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0), + A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, /* UNORM 32 */ @@ -253,202 +259,202 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32B32_UNORM] = { 0, - A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32_UNORM] = { 0, - A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0), + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32_UNORM] = { 0, - A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0), + A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, /* SNORM 16 */ - [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16G16B16_SNORM] = { 0, - A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), + A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW }, - [PIPE_FORMAT_R16G16_SNORM] = { NV50TCL_RT_FORMAT_R16G16_SNORM, + [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_SNORM] = { NV50TCL_RT_FORMAT_R16_SNORM, - A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0), + [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* UNORM 16 */ - [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_UNORM, + [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16G16B16_UNORM] = { 0, - A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW }, - [PIPE_FORMAT_R16G16_UNORM] = { NV50TCL_RT_FORMAT_R16G16_UNORM, + [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_UNORM] = { NV50TCL_RT_FORMAT_R16_UNORM, - A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0), + [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* SNORM 8 */ - [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R8G8B8_SNORM] = { 0, - A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), + A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW }, - [PIPE_FORMAT_R8G8_SNORM] = { NV50TCL_RT_FORMAT_R8G8_SNORM, - A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0), + [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8_SNORM] = { NV50TCL_RT_FORMAT_R8_SNORM, - A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0), + [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* UNORM 8 */ - [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50TCL_RT_FORMAT_A8B8G8R8_SRGB, + [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8_UNORM] = { NV50TCL_RT_FORMAT_X8B8G8R8_UNORM, - A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), + [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8B8_SRGB] = { NV50TCL_RT_FORMAT_X8B8G8R8_SRGB, - A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), + [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8G8_UNORM] = { NV50TCL_RT_FORMAT_R8G8_UNORM, - A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R8_UNORM] = { NV50TCL_RT_FORMAT_R8_UNORM, - A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), + [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - /* SSCALED 32 */ + /* SSCALED 32 (not integer, data is converted to float !) */ [PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0, A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32B32_SSCALED] = { 0, - A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32_SSCALED] = { 0, - A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R32_SSCALED] = { 0, - A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0), + VERTEX_BUFFER }, /* USCALED 32 */ [PIPE_FORMAT_R32G32B32A32_USCALED] = { 0, A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32B32_USCALED] = { 0, - A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R32G32_USCALED] = { 0, - A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R32_USCALED] = { 0, - A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32, 0), + VERTEX_BUFFER }, /* SSCALED 16 */ [PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0, A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R16G16B16_SSCALED] = { 0, - A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R16G16_SSCALED] = { 0, - A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R16_SSCALED] = { 0, - A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0), + VERTEX_BUFFER }, /* USCALED 16 */ [PIPE_FORMAT_R16G16B16A16_USCALED] = { 0, A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R16G16B16_USCALED] = { 0, - A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R16G16_USCALED] = { 0, - A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R16_USCALED] = { 0, - A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16, 0), + VERTEX_BUFFER }, /* SSCALED 8 */ [PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0, A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R8G8B8_SSCALED] = { 0, - A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R8G8_SSCALED] = { 0, - A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R8_SSCALED] = { 0, - A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0), + VERTEX_BUFFER }, /* USCALED 8 */ [PIPE_FORMAT_R8G8B8A8_USCALED] = { 0, A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER }, [PIPE_FORMAT_R8G8B8_USCALED] = { 0, - A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R8G8_USCALED] = { 0, - A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8, 0), + VERTEX_BUFFER }, [PIPE_FORMAT_R8_USCALED] = { 0, - A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0), + VERTEX_BUFFER }, }; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 309b6503ca..9eeca05ada 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -29,300 +29,284 @@ #include "nv50_resource.h" #include "nv50_transfer.h" -/* The restrictions in tile mode selection probably aren't necessary. */ static INLINE uint32_t -get_tile_mode(unsigned ny, unsigned d) +get_tile_dims(unsigned nx, unsigned ny, unsigned nz) { - uint32_t tile_mode = 0x00; - - if (ny > 32) tile_mode = 0x04; /* height 64 tiles */ - else - if (ny > 16) tile_mode = 0x03; /* height 32 tiles */ - else - if (ny > 8) tile_mode = 0x02; /* height 16 tiles */ - else - if (ny > 4) tile_mode = 0x01; /* height 8 tiles */ - - if (d == 1) - return tile_mode; - else - if (tile_mode > 0x02) - tile_mode = 0x02; - - if (d > 16 && tile_mode < 0x02) - return tile_mode | 0x50; /* depth 32 tiles */ - if (d > 8) return tile_mode | 0x40; /* depth 16 tiles */ - if (d > 4) return tile_mode | 0x30; /* depth 8 tiles */ - if (d > 2) return tile_mode | 0x20; /* depth 4 tiles */ - - return tile_mode | 0x10; + uint32_t tile_mode = 0x00; + + if (ny > 32) tile_mode = 0x04; /* height 128 tiles */ + else + if (ny > 16) tile_mode = 0x03; /* height 64 tiles */ + else + if (ny > 8) tile_mode = 0x02; /* height 32 tiles */ + else + if (ny > 4) tile_mode = 0x01; /* height 16 tiles */ + + if (nz == 1) + return tile_mode; + else + if (tile_mode > 0x02) + tile_mode = 0x02; + + if (nz > 16 && tile_mode < 0x02) + return tile_mode | 0x50; /* depth 32 tiles */ + if (nz > 8) return tile_mode | 0x40; /* depth 16 tiles */ + if (nz > 4) return tile_mode | 0x30; /* depth 8 tiles */ + if (nz > 2) return tile_mode | 0x20; /* depth 4 tiles */ + + return tile_mode | 0x10; } static INLINE unsigned -get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h) +calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) { - unsigned tile_h = get_tile_height(tile_mode); - unsigned tile_d = get_tile_depth(tile_mode); + unsigned tile_h = NV50_TILE_HEIGHT(tile_mode); + unsigned tile_d_shift = NV50_TILE_DIM_SHIFT(tile_mode, 1); + unsigned tile_d = 1 << tile_d_shift; - /* pitch_2d == to next slice within this volume-tile */ - /* pitch_3d == size (in bytes) of a volume-tile */ - unsigned pitch_2d = tile_h * 64; - unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch; + /* stride_2d == to next slice within this volume tile */ + /* stride_3d == size (in bytes) of a volume tile */ + unsigned stride_2d = tile_h * NV50_TILE_PITCH(tile_mode); + unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch; - return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; + return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; } - - - static void -nv50_miptree_destroy(struct pipe_screen *pscreen, - struct pipe_resource *pt) +nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) { - struct nv50_miptree *mt = nv50_miptree(pt); - unsigned l; + struct nv50_miptree *mt = nv50_miptree(pt); - for (l = 0; l <= pt->last_level; ++l) - FREE(mt->level[l].image_offset); + nouveau_screen_bo_release(pscreen, mt->base.bo); - nouveau_screen_bo_release(pscreen, mt->base.bo); - FREE(mt); + FREE(mt); } static boolean nv50_miptree_get_handle(struct pipe_screen *pscreen, - struct pipe_resource *pt, - struct winsys_handle *whandle) + struct pipe_resource *pt, + struct winsys_handle *whandle) { - struct nv50_miptree *mt = nv50_miptree(pt); - unsigned stride; + struct nv50_miptree *mt = nv50_miptree(pt); + unsigned stride; + if (!mt || !mt->base.bo) + return FALSE; - if (!mt || !mt->base.bo) - return FALSE; + stride = util_format_get_stride(mt->base.base.format, + mt->base.base.width0); - stride = util_format_get_stride(mt->base.base.format, - mt->base.base.width0); - - return nouveau_screen_bo_get_handle(pscreen, - mt->base.bo, - stride, - whandle); + return nouveau_screen_bo_get_handle(pscreen, + mt->base.bo, + stride, + whandle); } - const struct u_resource_vtbl nv50_miptree_vtbl = { - nv50_miptree_get_handle, /* get_handle */ - nv50_miptree_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ - nv50_miptree_transfer_new, /* get_transfer */ - nv50_miptree_transfer_del, /* transfer_destroy */ + nv50_miptree_get_handle, /* get_handle */ + nv50_miptree_destroy, /* resource_destroy */ + nv50_miptree_transfer_new, /* get_transfer */ + nv50_miptree_transfer_del, /* transfer_destroy */ nv50_miptree_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - nv50_miptree_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nv50_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; - - struct pipe_resource * -nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp) +nv50_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) { - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); - struct pipe_resource *pt = &mt->base.base; - unsigned width = tmp->width0, height = tmp->height0; - unsigned depth = tmp->depth0, image_alignment; - uint32_t tile_flags; - int ret, i, l; - - if (!mt) - return NULL; - - *pt = *tmp; - mt->base.vtbl = &nv50_miptree_vtbl; - pipe_reference_init(&pt->reference, 1); - pt->screen = pscreen; - - switch (pt->format) { - case PIPE_FORMAT_Z32_FLOAT: - tile_flags = 0x4800; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - tile_flags = 0x1800; - break; - case PIPE_FORMAT_Z16_UNORM: - tile_flags = 0x6c00; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - tile_flags = 0x2800; - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - tile_flags = 0xe000; - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - tile_flags = 0x7400; - break; - default: - if ((pt->bind & PIPE_BIND_SCANOUT) && - util_format_get_blocksizebits(pt->format) == 32) - tile_flags = 0x7a00; - else - tile_flags = 0x7000; - break; - } - - /* XXX: texture arrays */ - mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1; - - for (l = 0; l <= pt->last_level; l++) { - struct nv50_miptree_level *lvl = &mt->level[l]; - unsigned nblocksy = util_format_get_nblocksy(pt->format, height); - - lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); - lvl->pitch = align(util_format_get_stride(pt->format, width), 64); - lvl->tile_mode = get_tile_mode(nblocksy, depth); - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - image_alignment = get_tile_height(mt->level[0].tile_mode) * 64; - image_alignment *= get_tile_depth(mt->level[0].tile_mode); - - /* NOTE the distinction between arrays of mip-mapped 2D textures and - * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip. - */ - for (i = 0; i < mt->image_nr; i++) { - for (l = 0; l <= pt->last_level; l++) { - struct nv50_miptree_level *lvl = &mt->level[l]; - int size; - unsigned tile_h = get_tile_height(lvl->tile_mode); - unsigned tile_d = get_tile_depth(lvl->tile_mode); - - size = lvl->pitch; - size *= align(util_format_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h); - size *= align(u_minify(pt->depth0, l), tile_d); - - lvl->image_offset[i] = mt->total_size; - - mt->total_size += size; - } - mt->total_size = align(mt->total_size, image_alignment); - } - - ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size, - mt->level[0].tile_mode, tile_flags, - &mt->base.bo); - if (ret) { - for (l = 0; l <= pt->last_level; ++l) - FREE(mt->level[l].image_offset); - FREE(mt); - return NULL; - } - - return pt; + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_resource *pt = &mt->base.base; + int ret; + unsigned w, h, d, l, alloc_size; + uint32_t tile_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nv50_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0; + h = pt->height0; + d = mt->layout_3d ? pt->depth0 : 1; + + switch (pt->format) { + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + tile_flags = 0x1800; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + tile_flags = 0x2800; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + tile_flags = 0x7400; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + tile_flags = 0x6000; + break; + default: + if ((pt->bind & PIPE_BIND_SCANOUT) && + util_format_get_blocksizebits(pt->format) == 32) + tile_flags = 0x7a00; + else + tile_flags = 0x7000; + break; + } + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + for (l = 0; l <= pt->last_level; ++l) { + struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + unsigned blocksize = util_format_get_blocksize(pt->format); + + lvl->offset = mt->total_size; + lvl->tile_mode = get_tile_dims(nbx, nby, d); + lvl->pitch = align(nbx * blocksize, NV50_TILE_PITCH(lvl->tile_mode)); + + mt->total_size += lvl->pitch * + align(nby, NV50_TILE_HEIGHT(lvl->tile_mode)) * + align(d, NV50_TILE_DEPTH(lvl->tile_mode)); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NV50_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } + + alloc_size = mt->total_size; + + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, + mt->level[0].tile_mode, tile_flags, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + + return pt; } - struct pipe_resource * nv50_miptree_from_handle(struct pipe_screen *pscreen, - const struct pipe_resource *template, - struct winsys_handle *whandle) + const struct pipe_resource *templ, + struct winsys_handle *whandle) { - struct nv50_miptree *mt; - unsigned stride; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if ((template->target != PIPE_TEXTURE_2D && - template->target != PIPE_TEXTURE_RECT) || - template->last_level != 0 || - template->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nv50_miptree); - if (!mt) - return NULL; - - mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); - if (mt->base.bo == NULL) { - FREE(mt); - return NULL; - } - - - mt->base.base = *template; - mt->base.vtbl = &nv50_miptree_vtbl; - pipe_reference_init(&mt->base.base.reference, 1); - mt->base.base.screen = pscreen; - mt->image_nr = 1; - mt->level[0].pitch = stride; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - mt->level[0].tile_mode = mt->base.bo->tile_mode; - - /* XXX: Need to adjust bo refcount?? - */ - /* nouveau_bo_ref(bo, &mt->base.bo); */ - return &mt->base.base; + struct nv50_miptree *mt; + unsigned stride; + + /* only supports 2D, non-mipmapped textures for the moment */ + if ((templ->target != PIPE_TEXTURE_2D && + templ->target != PIPE_TEXTURE_RECT) || + templ->last_level != 0 || + templ->depth0 != 1 || + templ->array_size > 1) + return NULL; + + mt = CALLOC_STRUCT(nv50_miptree); + if (!mt) + return NULL; + + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + + mt->base.base = *templ; + mt->base.vtbl = &nv50_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->level[0].pitch = stride; + mt->level[0].offset = 0; + mt->level[0].tile_mode = mt->base.bo->tile_mode; + + /* no need to adjust bo reference count */ + return &mt->base.base; } - -/* Surface functions +/* Surface functions. */ struct pipe_surface * -nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, - const struct pipe_surface *surf_tmpl) +nv50_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) { - unsigned level = surf_tmpl->u.tex.level; - struct nv50_miptree *mt = nv50_miptree(pt); - struct nv50_miptree_level *lvl = &mt->level[level]; - struct nv50_surface *ns; - unsigned img = 0, zslice = 0; - - assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); - - /* XXX can't unify these here? */ - if (pt->target == PIPE_TEXTURE_CUBE) - img = surf_tmpl->u.tex.first_layer; - else if (pt->target == PIPE_TEXTURE_3D) - zslice = surf_tmpl->u.tex.first_layer; - - ns = CALLOC_STRUCT(nv50_surface); - if (!ns) - return NULL; - pipe_resource_reference(&ns->base.texture, pt); - ns->base.context = pipe; - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = surf_tmpl->usage; - pipe_reference_init(&ns->base.reference, 1); - ns->base.u.tex.level = level; - ns->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; - ns->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - ns->offset = lvl->image_offset[img]; - - if (pt->target == PIPE_TEXTURE_3D) { - unsigned nb_h = util_format_get_nblocksy(pt->format, ns->base.height); - ns->offset += get_zslice_offset(lvl->tile_mode, zslice, - lvl->pitch, nb_h); - } - - return &ns->base; + struct nv50_miptree *mt = nv50_miptree(pt); /* guaranteed */ + struct nv50_surface *ns; + struct pipe_surface *ps; + struct nv50_miptree_level *lvl = &mt->level[templ->u.tex.level]; + + ns = CALLOC_STRUCT(nv50_surface); + if (!ns) + return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = templ->format; + ps->usage = templ->usage; + ps->u.tex.level = templ->u.tex.level; + ps->u.tex.first_layer = templ->u.tex.first_layer; + ps->u.tex.last_layer = templ->u.tex.last_layer; + + ns->width = u_minify(pt->width0, ps->u.tex.level); + ns->height = u_minify(pt->height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = lvl->offset; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + + if (mt->layout_3d) { + unsigned zslice = ps->u.tex.first_layer; + + /* TODO: re-layout the texture to use only depth 1 tiles in this case: */ + if (ns->depth > 1 && (zslice & (NV50_TILE_DEPTH(lvl->tile_mode) - 1))) + NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n", + zslice, ps->u.tex.last_layer); + + ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, + util_format_get_nblocksy(pt->format, + ns->height)); + } else { + ns->offset += mt->layer_stride * ps->u.tex.first_layer; + } + + return ps; } void -nv50_miptree_surface_del(struct pipe_context *pipe, - struct pipe_surface *ps) +nv50_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) { - struct nv50_surface *s = nv50_surface(ps); + struct nv50_surface *s = nv50_surface(ps); + + pipe_resource_reference(&ps->texture, NULL); - pipe_resource_reference(&s->base.texture, NULL); - FREE(s); + FREE(s); } diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index c88e7ba742..82f1b84652 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -307,7 +307,10 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) bb[p++] = b->out[j]; break; case CFG_EDGE_LOOP_LEAVE: - bbb[pp++] = b->out[j]; + if (!b->out[j]->priv) { + bbb[pp++] = b->out[j]; + b->out[j]->priv = 1; + } break; default: assert(0); diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 2ead80430b..e6f3815baf 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -23,6 +23,8 @@ #ifndef __NV50_COMPILER_H__ #define __NV50_COMPILER_H__ +#define NV50PC_DEBUG + #ifdef NV50PC_DEBUG # define NV50_DBGMSG(args...) debug_printf(args) #else diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c index f37dc51e6a..252c58dd8f 100644 --- a/src/gallium/drivers/nv50/nv50_pc_emit.c +++ b/src/gallium/drivers/nv50/nv50_pc_emit.c @@ -762,7 +762,8 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op) new_fixup(pc, NV50_FIXUP_CODE_RELOC, 0, pos, 0xffff << 11, 9); new_fixup(pc, NV50_FIXUP_CODE_RELOC, 1, pos, 0x3f << 14, -4); - pc->emit[0] |= (pos / 4) << 11; + pc->emit[0] |= ((pos >> 2) & 0xffff) << 11; + pc->emit[1] |= ((pos >> 18) & 0x003f) << 14; } } diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 27eb3817bf..281ccf7ac6 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -145,8 +145,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) int j; uint size, n32 = 0; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -160,6 +161,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nv_nvi_delete(in->exit); } b->bin_pos = in->bin_pos + in->bin_size; + + if (in->bin_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; @@ -299,7 +303,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) } if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = cc_swapped[nvi->set_cond]; + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static int diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index db68176491..a63f9d8a6d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -328,10 +328,15 @@ prog_decl(struct nv50_translation_info *ti, } break; case TGSI_FILE_SYSTEM_VALUE: + /* For VP/GP inputs, they are put in s[] after the last normal input. + * Let sysval_map reflect the order of the sysvals in s[] and fixup later. + */ switch (decl->Semantic.Name) { case TGSI_SEMANTIC_FACE: break; case TGSI_SEMANTIC_INSTANCEID: + ti->p->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; + ti->sysval_map[first] = 2; break; case TGSI_SEMANTIC_PRIMID: break; @@ -392,6 +397,18 @@ nv50_vertprog_prepare(struct nv50_translation_info *ti) } } + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { + switch (ti->sysval_map[i]) { + case 2: + if (!(ti->p->vp.attrs[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID)) + ti->sysval_map[i] = 1; + ti->sysval_map[i] = (ti->sysval_map[i] - 1) + num_inputs; + break; + default: + break; + } + } + if (p->vp.psiz < 0x40) p->vp.psiz = p->out[p->vp.psiz].hw; @@ -411,11 +428,11 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti) if (ti->scan.writes_z) { p->fp.flags[1] = 0x11; - p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z; + p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; } if (ti->scan.uses_kill) - p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL; + p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; /* FP inputs */ @@ -490,13 +507,13 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti) if (n < m) nvary -= p->in[n].hw; - p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT; - p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT; + p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; + p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; /* FP outputs */ if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0))) - p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS; + p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; depr = p->out_nr; for (i = 0; i < p->out_nr; ++i) { @@ -608,7 +625,7 @@ nv50_prog_scan(struct nv50_translation_info *ti) } boolean -nv50_program_tx(struct nv50_program *p) +nv50_program_translate(struct nv50_program *p) { struct nv50_translation_info *ti; int ret; @@ -646,9 +663,8 @@ out: void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { - nouveau_bo_ref(NULL, &p->bo); - - so_ref(NULL, &p->so); + if (p->res) + nouveau_resource_free(&p->res); if (p->code) FREE(p->code); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 33c4c8ca6d..993e1691ab 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -47,12 +47,9 @@ struct nv50_program { boolean translated; boolean uses_lmem; - struct nouveau_bo *bo; - struct nouveau_stateobj *so; - uint32_t *code; unsigned code_size; - unsigned code_start; /* offset inside bo */ + unsigned code_base; uint32_t *immd; unsigned immd_size; unsigned parm_size; /* size limit of uniform buffer */ @@ -89,6 +86,8 @@ struct nv50_program { /* relocation records */ void *fixups; unsigned num_fixups; + + struct nouveau_resource *res; }; #define NV50_INTERP_LINEAR (1 << 0) @@ -112,6 +111,7 @@ struct nv50_translation_info { ubyte output_file; ubyte input_map[PIPE_MAX_SHADER_INPUTS][4]; ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4]; + ubyte sysval_map[TGSI_SEMANTIC_COUNT]; ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; int input_access[PIPE_MAX_SHADER_INPUTS][4]; int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index 380f69406a..e8ad1ddd38 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -1,362 +1,297 @@ + #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" -#include "util/u_split_prim.h" +#include "translate/translate.h" #include "nv50_context.h" #include "nv50_resource.h" -struct push_context { - struct nv50_context *nv50; +#include "nv50_3d.xml.h" - unsigned vtx_size; +struct push_context { + struct nouveau_channel *chan; void *idxbuf; - int32_t idxbias; - unsigned idxsize; float edgeflag; int edgeflag_attr; - struct { - void *map; - unsigned stride; - unsigned divisor; - unsigned step; - void (*push)(struct nouveau_channel *, void *); - } attr[16]; - unsigned attr_nr; + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + uint32_t prim; + uint32_t restart_index; + uint32_t instance_id; }; -static void -emit_b32_1(struct nouveau_channel *chan, void *data) +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) { - uint32_t *v = data; - - OUT_RING(chan, v[0]); + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; } -static void -emit_b32_2(struct nouveau_channel *chan, void *data) +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) { - uint32_t *v = data; - - OUT_RING(chan, v[0]); - OUT_RING(chan, v[1]); + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; } -static void -emit_b32_3(struct nouveau_channel *chan, void *data) +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) { - uint32_t *v = data; - - OUT_RING(chan, v[0]); - OUT_RING(chan, v[1]); - OUT_RING(chan, v[2]); + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; } static void -emit_b32_4(struct nouveau_channel *chan, void *data) +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) { - uint32_t *v = data; + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; - OUT_RING(chan, v[0]); - OUT_RING(chan, v[1]); - OUT_RING(chan, v[2]); - OUT_RING(chan, v[3]); -} + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; -static void -emit_b16_1(struct nouveau_channel *chan, void *data) -{ - uint16_t *v = data; + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); - OUT_RING(chan, v[0]); -} + size = ctx->vertex_words * nr; -static void -emit_b16_3(struct nouveau_channel *chan, void *data) -{ - uint16_t *v = data; + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - OUT_RING(chan, (v[1] << 16) | v[0]); - OUT_RING(chan, v[2]); -} + ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); -static void -emit_b08_1(struct nouveau_channel *chan, void *data) -{ - uint8_t *v = data; + ctx->chan->cur += size; + count -= nr; + elts += nr; - OUT_RING(chan, v[0]); + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (ctx->chan, ctx->restart_index); + } + } } static void -emit_b08_3(struct nouveau_channel *chan, void *data) +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) { - uint8_t *v = data; + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; - OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); -} + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; -static INLINE void -emit_vertex(struct push_context *ctx, unsigned n) -{ - struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; - int i; - - if (ctx->edgeflag_attr < 16) { - float *edgeflag = (float *) - ((uint8_t *)ctx->attr[ctx->edgeflag_attr].map + - ctx->attr[ctx->edgeflag_attr].stride * n); - - if (*edgeflag != ctx->edgeflag) { - BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (chan, *edgeflag ? 1 : 0); - ctx->edgeflag = *edgeflag; - } - } + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); - BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size); - for (i = 0; i < ctx->attr_nr; i++) - ctx->attr[i].push(chan, - (uint8_t *)ctx->attr[i].map + ctx->attr[i].stride * n); -} + size = ctx->vertex_words * nr; -static void -emit_edgeflag(void *priv, boolean enabled) -{ - struct push_context *ctx = priv; - struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (chan, enabled ? 1 : 0); -} + ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); -static void -emit_elt08(void *priv, unsigned start, unsigned count) -{ - struct push_context *ctx = priv; - uint8_t *idxbuf = ctx->idxbuf; + ctx->chan->cur += size; + count -= nr; + elts += nr; - while (count--) - emit_vertex(ctx, idxbuf[start++]); + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (ctx->chan, ctx->restart_index); + } + } } static void -emit_elt08_biased(void *priv, unsigned start, unsigned count) +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) { - struct push_context *ctx = priv; - uint8_t *idxbuf = ctx->idxbuf; + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; - while (count--) - emit_vertex(ctx, idxbuf[start++] + ctx->idxbias); -} + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; -static void -emit_elt16(void *priv, unsigned start, unsigned count) -{ - struct push_context *ctx = priv; - uint16_t *idxbuf = ctx->idxbuf; + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); - while (count--) - emit_vertex(ctx, idxbuf[start++]); -} + size = ctx->vertex_words * nr; -static void -emit_elt16_biased(void *priv, unsigned start, unsigned count) -{ - struct push_context *ctx = priv; - uint16_t *idxbuf = ctx->idxbuf; + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - while (count--) - emit_vertex(ctx, idxbuf[start++] + ctx->idxbias); -} + ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); -static void -emit_elt32(void *priv, unsigned start, unsigned count) -{ - struct push_context *ctx = priv; - uint32_t *idxbuf = ctx->idxbuf; + ctx->chan->cur += size; + count -= nr; + elts += nr; - while (count--) - emit_vertex(ctx, idxbuf[start++]); + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (ctx->chan, ctx->restart_index); + } + } } static void -emit_elt32_biased(void *priv, unsigned start, unsigned count) +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { - struct push_context *ctx = priv; - uint32_t *idxbuf = ctx->idxbuf; + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - while (count--) - emit_vertex(ctx, idxbuf[start++] + ctx->idxbias); + ctx->translate->run(ctx->translate, start, push, ctx->instance_id, + ctx->chan->cur); + ctx->chan->cur += size; + count -= push; + start += push; + } } -static void -emit_verts(void *priv, unsigned start, unsigned count) + +#define NV50_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nv50_prim_gl(unsigned prim) { - while (count--) - emit_vertex(priv, start++); + switch (prim) { + NV50_PRIM_GL_CASE(POINTS); + NV50_PRIM_GL_CASE(LINES); + NV50_PRIM_GL_CASE(LINE_LOOP); + NV50_PRIM_GL_CASE(LINE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLES); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLE_FAN); + NV50_PRIM_GL_CASE(QUADS); + NV50_PRIM_GL_CASE(QUAD_STRIP); + NV50_PRIM_GL_CASE(POLYGON); + NV50_PRIM_GL_CASE(LINES_ADJACENCY); + NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NV50_PRIM_GL_CASE(PATCHES); */ + default: + return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } } void -nv50_push_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *idxbuf, - unsigned idxsize, int idxbias, - unsigned mode, unsigned start, unsigned count, - unsigned i_start, unsigned i_count) +nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; struct push_context ctx; - const unsigned p_overhead = 4 + /* begin/end */ - 4; /* potential edgeflag enable/disable */ - const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ - 2; /* potential edgeflag modification */ - struct util_split_prim s; - unsigned vtx_size; - boolean nzi = FALSE; - int i; - - ctx.nv50 = nv50; - ctx.attr_nr = 0; - ctx.idxbuf = NULL; - ctx.vtx_size = 0; - ctx.edgeflag = 0.5f; - ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag; - - /* map vertex buffers, determine vertex size */ - for (i = 0; i < nv50->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; - struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo; - unsigned size, nr_components, n; - - if (!(nv50->vbo_fifo & (1 << i))) - continue; - n = ctx.attr_nr++; - - if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { - assert(bo->map); + unsigned i, index_size; + unsigned inst = info->instance_count; + boolean apply_bias = info->indexed && info->index_bias; + + ctx.chan = nv50->screen->base.channel; + ctx.translate = nv50->vertex->translate; + ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + ctx.vertex_words = nv50->vertex->vertex_size; + + for (i = 0; i < nv50->num_vtxbufs; ++i) { + uint8_t *data; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + struct nv04_resource *res = nv04_resource(vb->buffer); + + data = nouveau_resource_map_offset(&nv50->base, res, + vb->buffer_offset, NOUVEAU_BO_RD); + + if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i)))) + data += info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if (info->indexed) { + ctx.idxbuf = nouveau_resource_map_offset(&nv50->base, + nv04_resource(nv50->idxbuf.buffer), + nv50->idxbuf.offset, NOUVEAU_BO_RD); + if (!ctx.idxbuf) return; - } - ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; - nouveau_bo_unmap(bo); - - ctx.attr[n].stride = vb->stride; - ctx.attr[n].divisor = ve->instance_divisor; - if (ctx.attr[n].divisor) { - ctx.attr[n].step = i_start % ve->instance_divisor; - ctx.attr[n].map = (uint8_t *)ctx.attr[n].map + i_start * vb->stride; - } + index_size = nv50->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + } + + ctx.instance_id = info->start_instance; + ctx.prim = nv50_prim_gl(info->mode); - size = util_format_get_component_bits(ve->src_format, - UTIL_FORMAT_COLORSPACE_RGB, 0); - nr_components = util_format_get_nr_components(ve->src_format); - switch (size) { - case 8: - switch (nr_components) { - case 1: ctx.attr[n].push = emit_b08_1; break; - case 2: ctx.attr[n].push = emit_b16_1; break; - case 3: ctx.attr[n].push = emit_b08_3; break; - case 4: ctx.attr[n].push = emit_b32_1; break; - } - ctx.vtx_size++; + if (info->primitive_restart) { + BEGIN_RING(ctx.chan, RING_3D(PRIM_RESTART_ENABLE), 2); + OUT_RING (ctx.chan, 1); + OUT_RING (ctx.chan, info->restart_index); + } else + if (nv50->state.prim_restart) { + BEGIN_RING(ctx.chan, RING_3D(PRIM_RESTART_ENABLE), 1); + OUT_RING (ctx.chan, 0); + } + nv50->state.prim_restart = info->primitive_restart; + + while (inst--) { + BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (ctx.chan, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, info->count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, info->count); break; - case 16: - switch (nr_components) { - case 1: ctx.attr[n].push = emit_b16_1; break; - case 2: ctx.attr[n].push = emit_b32_1; break; - case 3: ctx.attr[n].push = emit_b16_3; break; - case 4: ctx.attr[n].push = emit_b32_2; break; - } - ctx.vtx_size += (nr_components + 1) >> 1; + case 2: + emit_vertices_i16(&ctx, info->start, info->count); break; - case 32: - switch (nr_components) { - case 1: ctx.attr[n].push = emit_b32_1; break; - case 2: ctx.attr[n].push = emit_b32_2; break; - case 3: ctx.attr[n].push = emit_b32_3; break; - case 4: ctx.attr[n].push = emit_b32_4; break; - } - ctx.vtx_size += nr_components; + case 4: + emit_vertices_i32(&ctx, info->start, info->count); break; default: assert(0); - return; + break; } - } - vtx_size = ctx.vtx_size + v_overhead; + BEGIN_RING(ctx.chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (ctx.chan, 0); - /* map index buffer, if present */ - if (idxbuf) { - struct nouveau_bo *bo = nv50_resource(idxbuf)->bo; - - if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { - assert(bo->map); - return; - } - ctx.idxbuf = bo->map; - ctx.idxbias = idxbias; - ctx.idxsize = idxsize; - nouveau_bo_unmap(bo); + ctx.instance_id++; + ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - s.priv = &ctx; - s.edge = emit_edgeflag; - if (idxbuf) { - if (idxsize == 1) - s.emit = idxbias ? emit_elt08_biased : emit_elt08; - else - if (idxsize == 2) - s.emit = idxbias ? emit_elt16_biased : emit_elt16; - else - s.emit = idxbias ? emit_elt32_biased : emit_elt32; - } else - s.emit = emit_verts; - - /* per-instance loop */ - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); - OUT_RING (chan, NV50_CB_AUX | (24 << 8)); - OUT_RING (chan, i_start); - while (i_count--) { - unsigned max_verts; - boolean done; - - for (i = 0; i < ctx.attr_nr; i++) { - if (!ctx.attr[i].divisor || - ctx.attr[i].divisor != ++ctx.attr[i].step) - continue; - ctx.attr[i].step = 0; - ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride; - } + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nv50->idxbuf.buffer)); - util_split_prim_init(&s, mode, start, count); - do { - if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { - FIRE_RING(chan); - if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) { - assert(0); - return; - } - } - - max_verts = AVAIL_RING(chan); - max_verts -= p_overhead; - max_verts /= vtx_size; - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); - done = util_split_prim_next(&s, max_verts); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - } while (!done); - - nzi = TRUE; - } + for (i = 0; i < nv50->num_vtxbufs; ++i) + nouveau_resource_unmap(nv04_resource(nv50->vtxbuf[i].buffer)); } diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 53f94820ce..2dce94a477 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Ben Skeggs + * Copyright 2011 Nouveau Project * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,150 +18,320 @@ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * Authors: Christoph Bumiller */ -#include "pipe/p_context.h" -#include "util/u_inlines.h" - #include "nv50_context.h" +#include "nouveau/nv_object.xml.h" + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ struct nv50_query { - struct nouveau_bo *bo; - unsigned type; - boolean ready; - uint64_t result; + uint32_t *data; + uint32_t type; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * 16 */ + boolean ready; + boolean is64bit; + struct nouveau_mm_allocation *mm; }; +#define NV50_QUERY_ALLOC_SPACE 128 + static INLINE struct nv50_query * nv50_query(struct pipe_query *pipe) { - return (struct nv50_query *)pipe; + return (struct nv50_query *)pipe; +} + +static boolean +nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) +{ + struct nv50_screen *screen = nv50->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->mm) { + if (q->ready) + nouveau_mm_free(q->mm); + else + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm); + } + } + if (size) { + q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD | + NOUVEAU_BO_NOSYNC); + if (ret) { + nv50_query_allocate(nv50, q, 0); + return FALSE; + } + q->data = q->bo->map; + nouveau_bo_unmap(q->bo); + } + return TRUE; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); + FREE(nv50_query(pq)); } static struct pipe_query * nv50_query_create(struct pipe_context *pipe, unsigned type) { - struct nouveau_device *dev = nouveau_screen(pipe->screen)->device; - struct nv50_query *q = CALLOC_STRUCT(nv50_query); - int ret; + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q; + + q = CALLOC_STRUCT(nv50_query); + if (!q) + return NULL; - assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); - q->type = type; + if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { + FREE(q); + return NULL; + } - ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 256, - 16, &q->bo); - if (ret) { - FREE(q); - return NULL; - } + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS); + q->type = type; - return (struct pipe_query *)q; + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset -= 16; + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + } + + return (struct pipe_query *)q; } static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +nv50_query_get(struct nouveau_channel *chan, struct nv50_query *q, + unsigned offset, uint32_t get) { - struct nv50_query *q = nv50_query(pq); + offset += q->offset; - if (q) { - nouveau_bo_ref(NULL, &q->bo); - FREE(q); - } + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, q->sequence); + OUT_RING (chan, get); } static void nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_query *q = nv50_query(pq); + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_query *q = nv50_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset += 16; + q->data += 16 / sizeof(*q->data); + if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) + nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); - BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); - OUT_RING (chan, 1); + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[1] = 1; /* initial render condition = TRUE */ + } + if (!q->is64bit) + q->data[0] = q->sequence++; /* the previously used one */ - q->ready = FALSE; + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1); + OUT_RING (chan, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + OUT_RING (chan, 1); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ + BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1); + OUT_RING (chan, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1); + OUT_RING (chan, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK); + break; + case PIPE_QUERY_SO_STATISTICS: + BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2); + OUT_RING (chan, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK); + OUT_RING (chan, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nv50_query_get(chan, q, 0x10, 0x00005002); + break; + default: + break; + } + q->ready = FALSE; } static void nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_query *q = nv50_query(pq); - - MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */ - BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4); - OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, 0x00000000); - OUT_RING (chan, 0x0100f002); - - BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); - OUT_RING (chan, 0); + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_query *q = nv50_query(pq); + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nv50_query_get(chan, q, 0, 0x0100f002); + BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + OUT_RING (chan, 0); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(chan, q, 0, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_query_get(chan, q, 0, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_query_get(chan, q, 0x00, 0x05805002); + nv50_query_get(chan, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nv50_query_get(chan, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + nv50_query_get(chan, q, 0, 0x1000f010); + break; + default: + assert(0); + break; + } +} + +static INLINE boolean +nv50_query_ready(struct nv50_query *q) +{ + return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); +} + +static INLINE boolean +nv50_query_wait(struct nv50_query *q) +{ + int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD); + if (ret) + return FALSE; + nouveau_bo_unmap(q->bo); + return TRUE; } static boolean nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, void *vresult) + boolean wait, void *result) { - uint64_t *result = (uint64_t*)vresult; - struct nv50_query *q = nv50_query(pq); - int ret; - - if (!q->ready) { - ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - (wait ? 0 : NOUVEAU_BO_NOWAIT)); - if (ret) - return false; - q->result = ((uint32_t *)q->bo->map)[1]; - q->ready = TRUE; - nouveau_bo_unmap(q->bo); - } - - *result = q->result; - return q->ready; + struct nv50_query *q = nv50_query(pq); + uint64_t *res64 = result; + boolean *res8 = result; + uint64_t *data64 = (uint64_t *)q->data; + + if (q->type == PIPE_QUERY_GPU_FINISHED) { + res8[0] = nv50_query_ready(q); + return TRUE; + } + + if (!q->ready) /* update ? */ + q->ready = nv50_query_ready(q); + if (!q->ready) { + struct nouveau_channel *chan = nv50_context(pipe)->screen->base.channel; + if (!wait) { + if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */ + FIRE_RING(chan); + return FALSE; + } + if (!nv50_query_wait(q)) + return FALSE; + } + q->ready = TRUE; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = q->data[1]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0]; + res64[1] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ + res64[0] = 1000000000; + res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + default: + return FALSE; + } + + return TRUE; } static void nv50_render_condition(struct pipe_context *pipe, - struct pipe_query *pq, uint mode) + struct pipe_query *pq, uint mode) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_query *q; - - if (!pq) { - BEGIN_RING(chan, tesla, NV50TCL_COND_MODE, 1); - OUT_RING (chan, NV50TCL_COND_MODE_ALWAYS); - return; - } - q = nv50_query(pq); - - if (mode == PIPE_RENDER_COND_WAIT || - mode == PIPE_RENDER_COND_BY_REGION_WAIT) { - /* XXX: big fence, FIFO semaphore might be better */ - BEGIN_RING(chan, tesla, 0x0110, 1); - OUT_RING (chan, 0); - } - - BEGIN_RING(chan, tesla, NV50TCL_COND_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RING (chan, NV50TCL_COND_MODE_RES); + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_query *q; + + if (!pq) { + BEGIN_RING(chan, RING_3D(COND_MODE), 1); + OUT_RING (chan, NV50_3D_COND_MODE_ALWAYS); + return; + } + q = nv50_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + BEGIN_RING(chan, RING_3D_(NV50_GRAPH_WAIT_FOR_IDLE), 1); + OUT_RING (chan, 0); + } + + MARK_RING (chan, 4, 2); + BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, NV50_3D_COND_MODE_RES_NON_ZERO); } void nv50_init_query_functions(struct nv50_context *nv50) { - nv50->pipe.create_query = nv50_query_create; - nv50->pipe.destroy_query = nv50_query_destroy; - nv50->pipe.begin_query = nv50_query_begin; - nv50->pipe.end_query = nv50_query_end; - nv50->pipe.get_query_result = nv50_query_result; - nv50->pipe.render_condition = nv50_render_condition; + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->create_query = nv50_query_create; + pipe->destroy_query = nv50_query_destroy; + pipe->begin_query = nv50_query_begin; + pipe->end_query = nv50_query_end; + pipe->get_query_result = nv50_query_result; + pipe->render_condition = nv50_render_condition; } diff --git a/src/gallium/drivers/nv50/nv50_reg.h b/src/gallium/drivers/nv50/nv50_reg.h deleted file mode 100644 index 949838b33f..0000000000 --- a/src/gallium/drivers/nv50/nv50_reg.h +++ /dev/null @@ -1,1827 +0,0 @@ -/************************************************************************* - - Autogenerated file, do not edit ! - - This file was generated by renouveau-gen from renouveau.xml, the - XML database of nvidia objects and methods. renouveau-gen and - renouveau.xml can be found in CVS module renouveau of sourceforge.net - project nouveau: - -cvs -z3 -d:pserver:anonymous@nouveau.cvs.sourceforge.net:/cvsroot/nouveau co -P renouveau - -************************************************************************** - - Copyright (C) 2006-2008 : - Dmitry Baryshkov, - Laurent Carlier, - Matthieu Castet, - Dawid Gajownik, - Jeremy Kolb, - Stephane Loeuillet, - Patrice Mandin, - Stephane Marchesin, - Serge Martin, - Sylvain Munaut, - Simon Raffeiner, - Ben Skeggs, - Erik Waling, - koala_br, - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -*************************************************************************/ - - -#ifndef NOUVEAU_REG_H -#define NOUVEAU_REG_H 1 - - -#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039 - -#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100 -#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104 -#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180 -#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184 -#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188 -#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c -#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310 -#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314 -#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318 -#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c -#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x000000ff -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x0000ff00 -#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328 - - -#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039 - -#define NV50_MEMORY_TO_MEMORY_FORMAT_SERIALIZE 0x00000110 -#define NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN 0x00000200 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_IN 0x00000204 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_IN 0x00000208 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_IN 0x0000020c -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_IN 0x00000210 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Z 0x00000214 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN 0x00000218 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X_SHIFT 0 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X_MASK 0x0000ffff -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y_SHIFT 16 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y_MASK 0xffff0000 -#define NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT 0x0000021c -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_OUT 0x00000220 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_OUT 0x00000224 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_OUT 0x00000228 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_OUT 0x0000022c -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Z 0x00000230 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT 0x00000234 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X_SHIFT 0 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X_MASK 0x0000ffff -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y_SHIFT 16 -#define NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y_MASK 0xffff0000 -#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238 -#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c - - -#define NV50_2D 0x0000502d - -#define NV50_2D_NOP 0x00000100 -#define NV50_2D_NOTIFY 0x00000104 -#define NV50_2D_SERIALIZE 0x00000110 -#define NV50_2D_DMA_NOTIFY 0x00000180 -#define NV50_2D_DMA_DST 0x00000184 -#define NV50_2D_DMA_SRC 0x00000188 -#define NV50_2D_DMA_COND 0x0000018c -#define NV50_2D_DST_FORMAT 0x00000200 -#define NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT 0x000000c0 -#define NV50_2D_DST_FORMAT_R32G32B32A32_SINT 0x000000c1 -#define NV50_2D_DST_FORMAT_R32G32B32A32_UINT 0x000000c2 -#define NV50_2D_DST_FORMAT_R32G32B32X32_FLOAT 0x000000c3 -#define NV50_2D_DST_FORMAT_R16G16B16A16_UNORM 0x000000c6 -#define NV50_2D_DST_FORMAT_R16G16B16A16_SNORM 0x000000c7 -#define NV50_2D_DST_FORMAT_R16G16B16A16_SINT 0x000000c8 -#define NV50_2D_DST_FORMAT_R16G16B16A16_UINT 0x000000c9 -#define NV50_2D_DST_FORMAT_R16G16B16A16_FLOAT 0x000000ca -#define NV50_2D_DST_FORMAT_R32G32_FLOAT 0x000000cb -#define NV50_2D_DST_FORMAT_R32G32_SINT 0x000000cc -#define NV50_2D_DST_FORMAT_R32G32_UINT 0x000000cd -#define NV50_2D_DST_FORMAT_R16G16B16X16_FLOAT 0x000000ce -#define NV50_2D_DST_FORMAT_A8R8G8B8_UNORM 0x000000cf -#define NV50_2D_DST_FORMAT_A8R8G8B8_SRGB 0x000000d0 -#define NV50_2D_DST_FORMAT_A2B10G10R10_UNORM 0x000000d1 -#define NV50_2D_DST_FORMAT_A2B10G10R10_UINT 0x000000d2 -#define NV50_2D_DST_FORMAT_A8B8G8R8_UNORM 0x000000d5 -#define NV50_2D_DST_FORMAT_A8B8G8R8_SRGB 0x000000d6 -#define NV50_2D_DST_FORMAT_A8B8G8R8_SNORM 0x000000d7 -#define NV50_2D_DST_FORMAT_A8B8G8R8_SINT 0x000000d8 -#define NV50_2D_DST_FORMAT_A8B8G8R8_UINT 0x000000d9 -#define NV50_2D_DST_FORMAT_R16G16_UNORM 0x000000da -#define NV50_2D_DST_FORMAT_R16G16_SNORM 0x000000db -#define NV50_2D_DST_FORMAT_R16G16_SINT 0x000000dc -#define NV50_2D_DST_FORMAT_R16G16_UINT 0x000000dd -#define NV50_2D_DST_FORMAT_R16G16_FLOAT 0x000000de -#define NV50_2D_DST_FORMAT_A2R10G10B10_UNORM 0x000000df -#define NV50_2D_DST_FORMAT_B10G11R11_FLOAT 0x000000e0 -#define NV50_2D_DST_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_2D_DST_FORMAT_X8R8G8B8_UNORM 0x000000e6 -#define NV50_2D_DST_FORMAT_X8R8G8B8_SRGB 0x000000e7 -#define NV50_2D_DST_FORMAT_R5G6B5_UNORM 0x000000e8 -#define NV50_2D_DST_FORMAT_A1R5G5B5_UNORM 0x000000e9 -#define NV50_2D_DST_FORMAT_R8G8_UNORM 0x000000ea -#define NV50_2D_DST_FORMAT_R8G8_SNORM 0x000000eb -#define NV50_2D_DST_FORMAT_R8G8_SINT 0x000000ec -#define NV50_2D_DST_FORMAT_R8G8_UINT 0x000000ed -#define NV50_2D_DST_FORMAT_R16_UNORM 0x000000ee -#define NV50_2D_DST_FORMAT_R16_SNORM 0x000000ef -#define NV50_2D_DST_FORMAT_R16_SINT 0x000000f0 -#define NV50_2D_DST_FORMAT_R16_UINT 0x000000f1 -#define NV50_2D_DST_FORMAT_R16_FLOAT 0x000000f2 -#define NV50_2D_DST_FORMAT_R8_UNORM 0x000000f3 -#define NV50_2D_DST_FORMAT_R8_SNORM 0x000000f4 -#define NV50_2D_DST_FORMAT_R8_SINT 0x000000f5 -#define NV50_2D_DST_FORMAT_R8_UINT 0x000000f6 -#define NV50_2D_DST_FORMAT_A8_UNORM 0x000000f7 -#define NV50_2D_DST_FORMAT_X1R5G5B5_UNORM 0x000000f8 -#define NV50_2D_DST_FORMAT_X8B8G8R8_UNORM 0x000000f9 -#define NV50_2D_DST_FORMAT_X8B8G8R8_SRGB 0x000000fa -#define NV50_2D_DST_LINEAR 0x00000204 -#define NV50_2D_DST_TILE_MODE 0x00000208 -#define NV50_2D_DST_DEPTH 0x0000020c -#define NV50_2D_DST_LAYER 0x00000210 -#define NV50_2D_DST_PITCH 0x00000214 -#define NV50_2D_DST_WIDTH 0x00000218 -#define NV50_2D_DST_HEIGHT 0x0000021c -#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 -#define NV50_2D_DST_ADDRESS_LOW 0x00000224 -#define NV50_2D_SRC_FORMAT 0x00000230 -#define NV50_2D_SRC_FORMAT_R32G32B32A32_FLOAT 0x000000c0 -#define NV50_2D_SRC_FORMAT_R32G32B32A32_SINT 0x000000c1 -#define NV50_2D_SRC_FORMAT_R32G32B32A32_UINT 0x000000c2 -#define NV50_2D_SRC_FORMAT_R32G32B32X32_FLOAT 0x000000c3 -#define NV50_2D_SRC_FORMAT_R16G16B16A16_UNORM 0x000000c6 -#define NV50_2D_SRC_FORMAT_R16G16B16A16_SNORM 0x000000c7 -#define NV50_2D_SRC_FORMAT_R16G16B16A16_SINT 0x000000c8 -#define NV50_2D_SRC_FORMAT_R16G16B16A16_UINT 0x000000c9 -#define NV50_2D_SRC_FORMAT_R16G16B16A16_FLOAT 0x000000ca -#define NV50_2D_SRC_FORMAT_R32G32_FLOAT 0x000000cb -#define NV50_2D_SRC_FORMAT_R32G32_SINT 0x000000cc -#define NV50_2D_SRC_FORMAT_R32G32_UINT 0x000000cd -#define NV50_2D_SRC_FORMAT_R16G16B16X16_FLOAT 0x000000ce -#define NV50_2D_SRC_FORMAT_A8R8G8B8_UNORM 0x000000cf -#define NV50_2D_SRC_FORMAT_A8R8G8B8_SRGB 0x000000d0 -#define NV50_2D_SRC_FORMAT_A2B10G10R10_UNORM 0x000000d1 -#define NV50_2D_SRC_FORMAT_A2B10G10R10_UINT 0x000000d2 -#define NV50_2D_SRC_FORMAT_A8B8G8R8_UNORM 0x000000d5 -#define NV50_2D_SRC_FORMAT_A8B8G8R8_SRGB 0x000000d6 -#define NV50_2D_SRC_FORMAT_A8B8G8R8_SNORM 0x000000d7 -#define NV50_2D_SRC_FORMAT_A8B8G8R8_SINT 0x000000d8 -#define NV50_2D_SRC_FORMAT_A8B8G8R8_UINT 0x000000d9 -#define NV50_2D_SRC_FORMAT_R16G16_UNORM 0x000000da -#define NV50_2D_SRC_FORMAT_R16G16_SNORM 0x000000db -#define NV50_2D_SRC_FORMAT_R16G16_SINT 0x000000dc -#define NV50_2D_SRC_FORMAT_R16G16_UINT 0x000000dd -#define NV50_2D_SRC_FORMAT_R16G16_FLOAT 0x000000de -#define NV50_2D_SRC_FORMAT_A2R10G10B10_UNORM 0x000000df -#define NV50_2D_SRC_FORMAT_B10G11R11_FLOAT 0x000000e0 -#define NV50_2D_SRC_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_2D_SRC_FORMAT_X8R8G8B8_UNORM 0x000000e6 -#define NV50_2D_SRC_FORMAT_X8R8G8B8_SRGB 0x000000e7 -#define NV50_2D_SRC_FORMAT_R5G6B5_UNORM 0x000000e8 -#define NV50_2D_SRC_FORMAT_A1R5G5B5_UNORM 0x000000e9 -#define NV50_2D_SRC_FORMAT_R8G8_UNORM 0x000000ea -#define NV50_2D_SRC_FORMAT_R8G8_SNORM 0x000000eb -#define NV50_2D_SRC_FORMAT_R8G8_SINT 0x000000ec -#define NV50_2D_SRC_FORMAT_R8G8_UINT 0x000000ed -#define NV50_2D_SRC_FORMAT_R16_UNORM 0x000000ee -#define NV50_2D_SRC_FORMAT_R16_SNORM 0x000000ef -#define NV50_2D_SRC_FORMAT_R16_SINT 0x000000f0 -#define NV50_2D_SRC_FORMAT_R16_UINT 0x000000f1 -#define NV50_2D_SRC_FORMAT_R16_FLOAT 0x000000f2 -#define NV50_2D_SRC_FORMAT_R8_UNORM 0x000000f3 -#define NV50_2D_SRC_FORMAT_R8_SNORM 0x000000f4 -#define NV50_2D_SRC_FORMAT_R8_SINT 0x000000f5 -#define NV50_2D_SRC_FORMAT_R8_UINT 0x000000f6 -#define NV50_2D_SRC_FORMAT_A8_UNORM 0x000000f7 -#define NV50_2D_SRC_FORMAT_X1R5G5B5_UNORM 0x000000f8 -#define NV50_2D_SRC_FORMAT_X8B8G8R8_UNORM 0x000000f9 -#define NV50_2D_SRC_FORMAT_X8B8G8R8_SRGB 0x000000fa -#define NV50_2D_SRC_LINEAR 0x00000234 -#define NV50_2D_SRC_TILE_MODE 0x00000238 -#define NV50_2D_SRC_DEPTH 0x0000023c -#define NV50_2D_SRC_LAYER 0x00000240 -#define NV50_2D_SRC_PITCH 0x00000244 -#define NV50_2D_SRC_WIDTH 0x00000248 -#define NV50_2D_SRC_HEIGHT 0x0000024c -#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 -#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 -#define NV50_2D_COND_ADDRESS_HIGH 0x00000264 -#define NV50_2D_COND_ADDRESS_LOW 0x00000268 -#define NV50_2D_COND_MODE 0x0000026c -#define NV50_2D_COND_MODE_NEVER 0x00000000 -#define NV50_2D_COND_MODE_ALWAYS 0x00000001 -#define NV50_2D_COND_MODE_RES 0x00000002 -#define NV50_2D_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003 -#define NV50_2D_COND_MODE_RES_OR_ID 0x00000004 -#define NV50_2D_CLIP_X 0x00000280 -#define NV50_2D_CLIP_Y 0x00000284 -#define NV50_2D_CLIP_W 0x00000288 -#define NV50_2D_CLIP_H 0x0000028c -#define NV50_2D_CLIP_ENABLE 0x00000290 -#define NV50_2D_COLOR_KEY_FORMAT 0x00000294 -#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 -#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 -#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 -#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 -#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 -#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 -#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 -#define NV50_2D_COLOR_KEY 0x00000298 -#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c -#define NV50_2D_ROP 0x000002a0 -#define NV50_2D_OPERATION 0x000002ac -#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 -#define NV50_2D_OPERATION_ROP_AND 0x00000001 -#define NV50_2D_OPERATION_BLEND_AND 0x00000002 -#define NV50_2D_OPERATION_SRCCOPY 0x00000003 -#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005 -#define NV50_2D_PATTERN_FORMAT 0x000002e8 -#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000 -#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001 -#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002 -#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003 -#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4)) -#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002 -#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4)) -#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002 -#define NV50_2D_DRAW_SHAPE 0x00000580 -#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000 -#define NV50_2D_DRAW_SHAPE_LINES 0x00000001 -#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 -#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003 -#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004 -#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584 -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32A32_FLOAT 0x000000c0 -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32A32_SINT 0x000000c1 -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32A32_UINT 0x000000c2 -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32B32X32_FLOAT 0x000000c3 -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_UNORM 0x000000c6 -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_SNORM 0x000000c7 -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_SINT 0x000000c8 -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_UINT 0x000000c9 -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16A16_FLOAT 0x000000ca -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32_FLOAT 0x000000cb -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32_SINT 0x000000cc -#define NV50_2D_DRAW_COLOR_FORMAT_R32G32_UINT 0x000000cd -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16B16X16_FLOAT 0x000000ce -#define NV50_2D_DRAW_COLOR_FORMAT_A8R8G8B8_UNORM 0x000000cf -#define NV50_2D_DRAW_COLOR_FORMAT_A8R8G8B8_SRGB 0x000000d0 -#define NV50_2D_DRAW_COLOR_FORMAT_A2B10G10R10_UNORM 0x000000d1 -#define NV50_2D_DRAW_COLOR_FORMAT_A2B10G10R10_UINT 0x000000d2 -#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_UNORM 0x000000d5 -#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_SRGB 0x000000d6 -#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_SNORM 0x000000d7 -#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_SINT 0x000000d8 -#define NV50_2D_DRAW_COLOR_FORMAT_A8B8G8R8_UINT 0x000000d9 -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_UNORM 0x000000da -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_SNORM 0x000000db -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_SINT 0x000000dc -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_UINT 0x000000dd -#define NV50_2D_DRAW_COLOR_FORMAT_R16G16_FLOAT 0x000000de -#define NV50_2D_DRAW_COLOR_FORMAT_A2R10G10B10_UNORM 0x000000df -#define NV50_2D_DRAW_COLOR_FORMAT_B10G11R11_FLOAT 0x000000e0 -#define NV50_2D_DRAW_COLOR_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_2D_DRAW_COLOR_FORMAT_X8R8G8B8_UNORM 0x000000e6 -#define NV50_2D_DRAW_COLOR_FORMAT_X8R8G8B8_SRGB 0x000000e7 -#define NV50_2D_DRAW_COLOR_FORMAT_R5G6B5_UNORM 0x000000e8 -#define NV50_2D_DRAW_COLOR_FORMAT_A1R5G5B5_UNORM 0x000000e9 -#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_UNORM 0x000000ea -#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_SNORM 0x000000eb -#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_SINT 0x000000ec -#define NV50_2D_DRAW_COLOR_FORMAT_R8G8_UINT 0x000000ed -#define NV50_2D_DRAW_COLOR_FORMAT_R16_UNORM 0x000000ee -#define NV50_2D_DRAW_COLOR_FORMAT_R16_SNORM 0x000000ef -#define NV50_2D_DRAW_COLOR_FORMAT_R16_SINT 0x000000f0 -#define NV50_2D_DRAW_COLOR_FORMAT_R16_UINT 0x000000f1 -#define NV50_2D_DRAW_COLOR_FORMAT_R16_FLOAT 0x000000f2 -#define NV50_2D_DRAW_COLOR_FORMAT_R8_UNORM 0x000000f3 -#define NV50_2D_DRAW_COLOR_FORMAT_R8_SNORM 0x000000f4 -#define NV50_2D_DRAW_COLOR_FORMAT_R8_SINT 0x000000f5 -#define NV50_2D_DRAW_COLOR_FORMAT_R8_UINT 0x000000f6 -#define NV50_2D_DRAW_COLOR_FORMAT_A8_UNORM 0x000000f7 -#define NV50_2D_DRAW_COLOR_FORMAT_X1R5G5B5_UNORM 0x000000f8 -#define NV50_2D_DRAW_COLOR_FORMAT_X8B8G8R8_UNORM 0x000000f9 -#define NV50_2D_DRAW_COLOR_FORMAT_X8B8G8R8_SRGB 0x000000fa -#define NV50_2D_DRAW_COLOR 0x00000588 -#define NV50_2D_DRAW_POINT16 0x000005e0 -#define NV50_2D_DRAW_POINT16_X_SHIFT 0 -#define NV50_2D_DRAW_POINT16_X_MASK 0x0000ffff -#define NV50_2D_DRAW_POINT16_Y_SHIFT 16 -#define NV50_2D_DRAW_POINT16_Y_MASK 0xffff0000 -#define NV50_2D_DRAW_POINT32_X(x) (0x00000600+((x)*8)) -#define NV50_2D_DRAW_POINT32_X__SIZE 0x00000040 -#define NV50_2D_DRAW_POINT32_Y(x) (0x00000604+((x)*8)) -#define NV50_2D_DRAW_POINT32_Y__SIZE 0x00000040 -#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800 -#define NV50_2D_SIFC_FORMAT 0x00000804 -#define NV50_2D_SIFC_FORMAT_R32G32B32A32_FLOAT 0x000000c0 -#define NV50_2D_SIFC_FORMAT_R32G32B32A32_SINT 0x000000c1 -#define NV50_2D_SIFC_FORMAT_R32G32B32A32_UINT 0x000000c2 -#define NV50_2D_SIFC_FORMAT_R32G32B32X32_FLOAT 0x000000c3 -#define NV50_2D_SIFC_FORMAT_R16G16B16A16_UNORM 0x000000c6 -#define NV50_2D_SIFC_FORMAT_R16G16B16A16_SNORM 0x000000c7 -#define NV50_2D_SIFC_FORMAT_R16G16B16A16_SINT 0x000000c8 -#define NV50_2D_SIFC_FORMAT_R16G16B16A16_UINT 0x000000c9 -#define NV50_2D_SIFC_FORMAT_R16G16B16A16_FLOAT 0x000000ca -#define NV50_2D_SIFC_FORMAT_R32G32_FLOAT 0x000000cb -#define NV50_2D_SIFC_FORMAT_R32G32_SINT 0x000000cc -#define NV50_2D_SIFC_FORMAT_R32G32_UINT 0x000000cd -#define NV50_2D_SIFC_FORMAT_R16G16B16X16_FLOAT 0x000000ce -#define NV50_2D_SIFC_FORMAT_A8R8G8B8_UNORM 0x000000cf -#define NV50_2D_SIFC_FORMAT_A8R8G8B8_SRGB 0x000000d0 -#define NV50_2D_SIFC_FORMAT_A2B10G10R10_UNORM 0x000000d1 -#define NV50_2D_SIFC_FORMAT_A2B10G10R10_UINT 0x000000d2 -#define NV50_2D_SIFC_FORMAT_A8B8G8R8_UNORM 0x000000d5 -#define NV50_2D_SIFC_FORMAT_A8B8G8R8_SRGB 0x000000d6 -#define NV50_2D_SIFC_FORMAT_A8B8G8R8_SNORM 0x000000d7 -#define NV50_2D_SIFC_FORMAT_A8B8G8R8_SINT 0x000000d8 -#define NV50_2D_SIFC_FORMAT_A8B8G8R8_UINT 0x000000d9 -#define NV50_2D_SIFC_FORMAT_R16G16_UNORM 0x000000da -#define NV50_2D_SIFC_FORMAT_R16G16_SNORM 0x000000db -#define NV50_2D_SIFC_FORMAT_R16G16_SINT 0x000000dc -#define NV50_2D_SIFC_FORMAT_R16G16_UINT 0x000000dd -#define NV50_2D_SIFC_FORMAT_R16G16_FLOAT 0x000000de -#define NV50_2D_SIFC_FORMAT_A2R10G10B10_UNORM 0x000000df -#define NV50_2D_SIFC_FORMAT_B10G11R11_FLOAT 0x000000e0 -#define NV50_2D_SIFC_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_2D_SIFC_FORMAT_X8R8G8B8_UNORM 0x000000e6 -#define NV50_2D_SIFC_FORMAT_X8R8G8B8_SRGB 0x000000e7 -#define NV50_2D_SIFC_FORMAT_R5G6B5_UNORM 0x000000e8 -#define NV50_2D_SIFC_FORMAT_A1R5G5B5_UNORM 0x000000e9 -#define NV50_2D_SIFC_FORMAT_R8G8_UNORM 0x000000ea -#define NV50_2D_SIFC_FORMAT_R8G8_SNORM 0x000000eb -#define NV50_2D_SIFC_FORMAT_R8G8_SINT 0x000000ec -#define NV50_2D_SIFC_FORMAT_R8G8_UINT 0x000000ed -#define NV50_2D_SIFC_FORMAT_R16_UNORM 0x000000ee -#define NV50_2D_SIFC_FORMAT_R16_SNORM 0x000000ef -#define NV50_2D_SIFC_FORMAT_R16_SINT 0x000000f0 -#define NV50_2D_SIFC_FORMAT_R16_UINT 0x000000f1 -#define NV50_2D_SIFC_FORMAT_R16_FLOAT 0x000000f2 -#define NV50_2D_SIFC_FORMAT_R8_UNORM 0x000000f3 -#define NV50_2D_SIFC_FORMAT_R8_SNORM 0x000000f4 -#define NV50_2D_SIFC_FORMAT_R8_SINT 0x000000f5 -#define NV50_2D_SIFC_FORMAT_R8_UINT 0x000000f6 -#define NV50_2D_SIFC_FORMAT_A8_UNORM 0x000000f7 -#define NV50_2D_SIFC_FORMAT_X1R5G5B5_UNORM 0x000000f8 -#define NV50_2D_SIFC_FORMAT_X8B8G8R8_UNORM 0x000000f9 -#define NV50_2D_SIFC_FORMAT_X8B8G8R8_SRGB 0x000000fa -#define NV50_2D_SIFC_BITMAP_UNK808 0x00000808 -#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c -#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 -#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 -#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 -#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 -#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 -#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 -#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c -#define NV50_2D_SIFC_WIDTH 0x00000838 -#define NV50_2D_SIFC_HEIGHT 0x0000083c -#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840 -#define NV50_2D_SIFC_DX_DU_INT 0x00000844 -#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848 -#define NV50_2D_SIFC_DY_DV_INT 0x0000084c -#define NV50_2D_SIFC_DST_X_FRACT 0x00000850 -#define NV50_2D_SIFC_DST_X_INT 0x00000854 -#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858 -#define NV50_2D_SIFC_DST_Y_INT 0x0000085c -#define NV50_2D_SIFC_DATA 0x00000860 -#define NV50_2D_BLIT_DST_X 0x000008b0 -#define NV50_2D_BLIT_DST_Y 0x000008b4 -#define NV50_2D_BLIT_DST_W 0x000008b8 -#define NV50_2D_BLIT_DST_H 0x000008bc -#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0 -#define NV50_2D_BLIT_DU_DX_INT 0x000008c4 -#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8 -#define NV50_2D_BLIT_DV_DY_INT 0x000008cc -#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0 -#define NV50_2D_BLIT_SRC_X_INT 0x000008d4 -#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8 -#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc - - -#define NV50TCL 0x00005097 - -#define NV50TCL_NOP 0x00000100 -#define NV50TCL_NOTIFY 0x00000104 -#define NV50TCL_SERIALIZE 0x00000110 -#define NV50TCL_DMA_NOTIFY 0x00000180 -#define NV50TCL_DMA_ZETA 0x00000184 -#define NV50TCL_DMA_QUERY 0x00000188 -#define NV50TCL_DMA_VTXBUF0 0x0000018c -#define NV50TCL_DMA_LOCAL 0x00000190 -#define NV50TCL_DMA_STACK 0x00000194 -#define NV50TCL_DMA_CODE_CB 0x00000198 -#define NV50TCL_DMA_TSC 0x0000019c -#define NV50TCL_DMA_TIC 0x000001a0 -#define NV50TCL_DMA_TEXTURE 0x000001a4 -#define NV50TCL_DMA_STRMOUT 0x000001a8 -#define NV50TCL_DMA_CLIPID 0x000001ac -#define NV50TCL_DMA_COLOR(x) (0x000001c0+((x)*4)) -#define NV50TCL_DMA_COLOR__SIZE 0x00000008 -#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32)) -#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008 -#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32)) -#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008 -#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32)) -#define NV50TCL_RT_FORMAT__SIZE 0x00000008 -#define NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT 0x000000c0 -#define NV50TCL_RT_FORMAT_R32G32B32A32_SINT 0x000000c1 -#define NV50TCL_RT_FORMAT_R32G32B32A32_UINT 0x000000c2 -#define NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT 0x000000c3 -#define NV50TCL_RT_FORMAT_R16G16B16A16_UNORM 0x000000c6 -#define NV50TCL_RT_FORMAT_R16G16B16A16_SNORM 0x000000c7 -#define NV50TCL_RT_FORMAT_R16G16B16A16_SINT 0x000000c8 -#define NV50TCL_RT_FORMAT_R16G16B16A16_UINT 0x000000c9 -#define NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT 0x000000ca -#define NV50TCL_RT_FORMAT_R32G32_FLOAT 0x000000cb -#define NV50TCL_RT_FORMAT_R32G32_SINT 0x000000cc -#define NV50TCL_RT_FORMAT_R32G32_UINT 0x000000cd -#define NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT 0x000000ce -#define NV50TCL_RT_FORMAT_A8R8G8B8_UNORM 0x000000cf -#define NV50TCL_RT_FORMAT_A8R8G8B8_SRGB 0x000000d0 -#define NV50TCL_RT_FORMAT_A2B10G10R10_UNORM 0x000000d1 -#define NV50TCL_RT_FORMAT_A2B10G10R10_UINT 0x000000d2 -#define NV50TCL_RT_FORMAT_A8B8G8R8_UNORM 0x000000d5 -#define NV50TCL_RT_FORMAT_A8B8G8R8_SRGB 0x000000d6 -#define NV50TCL_RT_FORMAT_A8B8G8R8_SNORM 0x000000d7 -#define NV50TCL_RT_FORMAT_A8B8G8R8_SINT 0x000000d8 -#define NV50TCL_RT_FORMAT_A8B8G8R8_UINT 0x000000d9 -#define NV50TCL_RT_FORMAT_R16G16_UNORM 0x000000da -#define NV50TCL_RT_FORMAT_R16G16_SNORM 0x000000db -#define NV50TCL_RT_FORMAT_R16G16_SINT 0x000000dc -#define NV50TCL_RT_FORMAT_R16G16_UINT 0x000000dd -#define NV50TCL_RT_FORMAT_R16G16_FLOAT 0x000000de -#define NV50TCL_RT_FORMAT_A2R10G10B10_UNORM 0x000000df -#define NV50TCL_RT_FORMAT_B10G11R11_FLOAT 0x000000e0 -#define NV50TCL_RT_FORMAT_R32_FLOAT 0x000000e5 -#define NV50TCL_RT_FORMAT_X8R8G8B8_UNORM 0x000000e6 -#define NV50TCL_RT_FORMAT_X8R8G8B8_SRGB 0x000000e7 -#define NV50TCL_RT_FORMAT_R5G6B5_UNORM 0x000000e8 -#define NV50TCL_RT_FORMAT_A1R5G5B5_UNORM 0x000000e9 -#define NV50TCL_RT_FORMAT_R8G8_UNORM 0x000000ea -#define NV50TCL_RT_FORMAT_R8G8_SNORM 0x000000eb -#define NV50TCL_RT_FORMAT_R8G8_SINT 0x000000ec -#define NV50TCL_RT_FORMAT_R8G8_UINT 0x000000ed -#define NV50TCL_RT_FORMAT_R16_UNORM 0x000000ee -#define NV50TCL_RT_FORMAT_R16_SNORM 0x000000ef -#define NV50TCL_RT_FORMAT_R16_SINT 0x000000f0 -#define NV50TCL_RT_FORMAT_R16_UINT 0x000000f1 -#define NV50TCL_RT_FORMAT_R16_FLOAT 0x000000f2 -#define NV50TCL_RT_FORMAT_R8_UNORM 0x000000f3 -#define NV50TCL_RT_FORMAT_R8_SNORM 0x000000f4 -#define NV50TCL_RT_FORMAT_R8_SINT 0x000000f5 -#define NV50TCL_RT_FORMAT_R8_UINT 0x000000f6 -#define NV50TCL_RT_FORMAT_A8_UNORM 0x000000f7 -#define NV50TCL_RT_FORMAT_X1R5G5B5_UNORM 0x000000f8 -#define NV50TCL_RT_FORMAT_X8B8G8R8_UNORM 0x000000f9 -#define NV50TCL_RT_FORMAT_X8B8G8R8_SRGB 0x000000fa -#define NV50TCL_RT_TILE_MODE(x) (0x0000020c+((x)*32)) -#define NV50TCL_RT_TILE_MODE__SIZE 0x00000008 -#define NV50TCL_RT_LAYER_STRIDE(x) (0x00000210+((x)*32)) -#define NV50TCL_RT_LAYER_STRIDE__SIZE 0x00000008 -#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4)) -#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2H(x) (0x00000340+((x)*4)) -#define NV50TCL_VTX_ATTR_2H__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2H_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_2H_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_2H_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_2H_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8)) -#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8)) -#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4H_0(x) (0x00000600+((x)*8)) -#define NV50TCL_VTX_ATTR_4H_0__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4H_0_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4H_0_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4H_0_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_4H_0_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4H_1(x) (0x00000604+((x)*8)) -#define NV50TCL_VTX_ATTR_4H_1__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4H_1_Z_SHIFT 0 -#define NV50TCL_VTX_ATTR_4H_1_Z_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4H_1_W_SHIFT 16 -#define NV50TCL_VTX_ATTR_4H_1_W_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4)) -#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_2NI(x) (0x000006c0+((x)*4)) -#define NV50TCL_VTX_ATTR_2NI__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2NI_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_2NI_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_2NI_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_2NI_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8)) -#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8)) -#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0 -#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16 -#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8)) -#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8)) -#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0 -#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16 -#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4UB(x) (0x00000800+((x)*4)) -#define NV50TCL_VTX_ATTR_4UB__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4UB_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4UB_X_MASK 0x000000ff -#define NV50TCL_VTX_ATTR_4UB_Y_SHIFT 8 -#define NV50TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00 -#define NV50TCL_VTX_ATTR_4UB_Z_SHIFT 16 -#define NV50TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000 -#define NV50TCL_VTX_ATTR_4UB_W_SHIFT 24 -#define NV50TCL_VTX_ATTR_4UB_W_MASK 0xff000000 -#define NV50TCL_VTX_ATTR_4B(x) (0x00000840+((x)*4)) -#define NV50TCL_VTX_ATTR_4B__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4B_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4B_X_MASK 0x000000ff -#define NV50TCL_VTX_ATTR_4B_Y_SHIFT 8 -#define NV50TCL_VTX_ATTR_4B_Y_MASK 0x0000ff00 -#define NV50TCL_VTX_ATTR_4B_Z_SHIFT 16 -#define NV50TCL_VTX_ATTR_4B_Z_MASK 0x00ff0000 -#define NV50TCL_VTX_ATTR_4B_W_SHIFT 24 -#define NV50TCL_VTX_ATTR_4B_W_MASK 0xff000000 -#define NV50TCL_VTX_ATTR_4NUB(x) (0x00000880+((x)*4)) -#define NV50TCL_VTX_ATTR_4NUB__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4NUB_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4NUB_X_MASK 0x000000ff -#define NV50TCL_VTX_ATTR_4NUB_Y_SHIFT 8 -#define NV50TCL_VTX_ATTR_4NUB_Y_MASK 0x0000ff00 -#define NV50TCL_VTX_ATTR_4NUB_Z_SHIFT 16 -#define NV50TCL_VTX_ATTR_4NUB_Z_MASK 0x00ff0000 -#define NV50TCL_VTX_ATTR_4NUB_W_SHIFT 24 -#define NV50TCL_VTX_ATTR_4NUB_W_MASK 0xff000000 -#define NV50TCL_VTX_ATTR_4NB(x) (0x000008c0+((x)*4)) -#define NV50TCL_VTX_ATTR_4NB__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4NB_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4NB_X_MASK 0x000000ff -#define NV50TCL_VTX_ATTR_4NB_Y_SHIFT 8 -#define NV50TCL_VTX_ATTR_4NB_Y_MASK 0x0000ff00 -#define NV50TCL_VTX_ATTR_4NB_Z_SHIFT 16 -#define NV50TCL_VTX_ATTR_4NB_Z_MASK 0x00ff0000 -#define NV50TCL_VTX_ATTR_4NB_W_SHIFT 24 -#define NV50TCL_VTX_ATTR_4NB_W_MASK 0xff000000 -#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16)) -#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 -#define NV50TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 0 -#define NV50TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x00000fff -#define NV50TCL_VERTEX_ARRAY_FORMAT_ENABLE (1 << 29) -#define NV50TCL_VERTEX_ARRAY_START_HIGH(x) (0x00000904+((x)*16)) -#define NV50TCL_VERTEX_ARRAY_START_HIGH__SIZE 0x00000010 -#define NV50TCL_VERTEX_ARRAY_START_LOW(x) (0x00000908+((x)*16)) -#define NV50TCL_VERTEX_ARRAY_START_LOW__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_SCALE_X(x) (0x00000a00+((x)*32)) -#define NV50TCL_VIEWPORT_SCALE_X__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_SCALE_Y(x) (0x00000a04+((x)*32)) -#define NV50TCL_VIEWPORT_SCALE_Y__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_SCALE_Z(x) (0x00000a08+((x)*32)) -#define NV50TCL_VIEWPORT_SCALE_Z__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_TRANSLATE_X(x) (0x00000a0c+((x)*32)) -#define NV50TCL_VIEWPORT_TRANSLATE_X__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_TRANSLATE_Y(x) (0x00000a10+((x)*32)) -#define NV50TCL_VIEWPORT_TRANSLATE_Y__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_TRANSLATE_Z(x) (0x00000a14+((x)*32)) -#define NV50TCL_VIEWPORT_TRANSLATE_Z__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_HORIZ(x) (0x00000c00+((x)*16)) -#define NV50TCL_VIEWPORT_HORIZ__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0 -#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff -#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16 -#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 -#define NV50TCL_VIEWPORT_VERT(x) (0x00000c04+((x)*16)) -#define NV50TCL_VIEWPORT_VERT__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0 -#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff -#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16 -#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000 -#define NV50TCL_DEPTH_RANGE_NEAR(x) (0x00000c08+((x)*16)) -#define NV50TCL_DEPTH_RANGE_NEAR__SIZE 0x00000010 -#define NV50TCL_DEPTH_RANGE_FAR(x) (0x00000c0c+((x)*16)) -#define NV50TCL_DEPTH_RANGE_FAR__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8)) -#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 -#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT 0 -#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK 0x0000ffff -#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT 16 -#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK 0xffff0000 -#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8)) -#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 -#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT 0 -#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_MASK 0x0000ffff -#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT 16 -#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_MASK 0xffff0000 -#define NV50TCL_CLIPID_REGION_HORIZ(x) (0x00000d40+((x)*8)) -#define NV50TCL_CLIPID_REGION_HORIZ__SIZE 0x00000004 -#define NV50TCL_CLIPID_REGION_VERT(x) (0x00000d44+((x)*8)) -#define NV50TCL_CLIPID_REGION_VERT__SIZE 0x00000004 -#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74 -#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78 -#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4)) -#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004 -#define NV50TCL_CLEAR_DEPTH 0x00000d90 -#define NV50TCL_STACK_ADDRESS_HIGH 0x00000d94 -#define NV50TCL_STACK_ADDRESS_LOW 0x00000d98 -#define NV50TCL_STACK_SIZE_LOG 0x00000d9c -#define NV50TCL_CLEAR_STENCIL 0x00000da0 -#define NV50TCL_STRMOUT_PRIMITIVE_COUNT 0x00000da8 -#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac -#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 -#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 -#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 -#define NV50TCL_POLYGON_MODE_BACK 0x00000db0 -#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00 -#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01 -#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02 -#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4 -#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 -#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 -#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 -#define NV50TCL_WATCHDOG_TIMER 0x00000de4 -#define NV50TCL_WINDOW_OFFSET_X 0x00000df8 -#define NV50TCL_WINDOW_OFFSET_Y 0x00000dfc -#define NV50TCL_SCISSOR_ENABLE(x) (0x00000e00+((x)*16)) -#define NV50TCL_SCISSOR_ENABLE__SIZE 0x00000010 -#define NV50TCL_SCISSOR_HORIZ(x) (0x00000e04+((x)*16)) -#define NV50TCL_SCISSOR_HORIZ__SIZE 0x00000010 -#define NV50TCL_SCISSOR_HORIZ_MIN_SHIFT 0 -#define NV50TCL_SCISSOR_HORIZ_MIN_MASK 0x0000ffff -#define NV50TCL_SCISSOR_HORIZ_MAX_SHIFT 16 -#define NV50TCL_SCISSOR_HORIZ_MAX_MASK 0xffff0000 -#define NV50TCL_SCISSOR_VERT(x) (0x00000e08+((x)*16)) -#define NV50TCL_SCISSOR_VERT__SIZE 0x00000010 -#define NV50TCL_SCISSOR_VERT_MIN_SHIFT 0 -#define NV50TCL_SCISSOR_VERT_MIN_MASK 0x0000ffff -#define NV50TCL_SCISSOR_VERT_MAX_SHIFT 16 -#define NV50TCL_SCISSOR_VERT_MAX_MASK 0xffff0000 -#define NV50TCL_CB_ADDR 0x00000f00 -#define NV50TCL_CB_ADDR_ID_SHIFT 8 -#define NV50TCL_CB_ADDR_ID_MASK 0x003fff00 -#define NV50TCL_CB_ADDR_BUFFER_SHIFT 0 -#define NV50TCL_CB_ADDR_BUFFER_MASK 0x0000007f -#define NV50TCL_CB_DATA(x) (0x00000f04+((x)*4)) -#define NV50TCL_CB_DATA__SIZE 0x00000010 -#define NV50TCL_LOCAL_WARPS_LOG_ALLOC 0x00000f44 -#define NV50TCL_LOCAL_WARPS_NO_CLAMP 0x00000f48 -#define NV50TCL_STACK_WARPS_LOG_ALLOC 0x00000f4c -#define NV50TCL_STACK_WARPS_NO_CLAMP 0x00000f50 -#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00000f54 -#define NV50TCL_STENCIL_BACK_MASK 0x00000f58 -#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x00000f5c -#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70 -#define NV50TCL_GP_ADDRESS_LOW 0x00000f74 -#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c -#define NV50TCL_VP_ADDRESS_LOW 0x00000f80 -#define NV50TCL_VERTEX_RUNOUT_HIGH 0x00000f84 -#define NV50TCL_VERTEX_RUNOUT_LOW 0x00000f88 -#define NV50TCL_DEPTH_BOUNDS(x) (0x00000f9c+((x)*4)) -#define NV50TCL_DEPTH_BOUNDS__SIZE 0x00000002 -#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4 -#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8 -#define NV50TCL_MSAA_MASK(x) (0x00000fbc+((x)*4)) -#define NV50TCL_MSAA_MASK__SIZE 0x00000004 -#define NV50TCL_CLIPID_ADDRESS_HIGH 0x00000fcc -#define NV50TCL_CLIPID_ADDRESS_LOW 0x00000fd0 -#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0 -#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4 -#define NV50TCL_ZETA_FORMAT 0x00000fe8 -#define NV50TCL_ZETA_FORMAT_Z32_FLOAT 0x0000000a -#define NV50TCL_ZETA_FORMAT_Z16_UNORM 0x00000013 -#define NV50TCL_ZETA_FORMAT_Z24S8_UNORM 0x00000014 -#define NV50TCL_ZETA_FORMAT_X8Z24_UNORM 0x00000015 -#define NV50TCL_ZETA_FORMAT_S8Z24_UNORM 0x00000016 -#define NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019 -#define NV50TCL_ZETA_TILE_MODE 0x00000fec -#define NV50TCL_ZETA_LAYER_STRIDE 0x00000ff0 -#define NV50TCL_SCREEN_SCISSOR_HORIZ 0x00000ff4 -#define NV50TCL_SCREEN_SCISSOR_HORIZ_W_SHIFT 16 -#define NV50TCL_SCREEN_SCISSOR_HORIZ_W_MASK 0xffff0000 -#define NV50TCL_SCREEN_SCISSOR_HORIZ_X_SHIFT 0 -#define NV50TCL_SCREEN_SCISSOR_HORIZ_X_MASK 0x0000ffff -#define NV50TCL_SCREEN_SCISSOR_VERT 0x00000ff8 -#define NV50TCL_SCREEN_SCISSOR_VERT_H_SHIFT 16 -#define NV50TCL_SCREEN_SCISSOR_VERT_H_MASK 0xffff0000 -#define NV50TCL_SCREEN_SCISSOR_VERT_Y_SHIFT 0 -#define NV50TCL_SCREEN_SCISSOR_VERT_Y_MASK 0x0000ffff -#define NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(x) (0x00001080+((x)*8)) -#define NV50TCL_VERTEX_ARRAY_LIMIT_HIGH__SIZE 0x00000010 -#define NV50TCL_VERTEX_ARRAY_LIMIT_LOW(x) (0x00001084+((x)*8)) -#define NV50TCL_VERTEX_ARRAY_LIMIT_LOW__SIZE 0x00000010 -#define NV50TCL_RT_CONTROL 0x0000121c -#define NV50TCL_RT_CONTROL_COUNT_SHIFT 0 -#define NV50TCL_RT_CONTROL_COUNT_MASK 0x0000000f -#define NV50TCL_RT_CONTROL_MAP0_SHIFT 4 -#define NV50TCL_RT_CONTROL_MAP0_MASK 0x00000070 -#define NV50TCL_RT_CONTROL_MAP1_SHIFT 7 -#define NV50TCL_RT_CONTROL_MAP1_MASK 0x00000380 -#define NV50TCL_RT_CONTROL_MAP2_SHIFT 10 -#define NV50TCL_RT_CONTROL_MAP2_MASK 0x00001c00 -#define NV50TCL_RT_CONTROL_MAP3_SHIFT 13 -#define NV50TCL_RT_CONTROL_MAP3_MASK 0x0000e000 -#define NV50TCL_RT_CONTROL_MAP4_SHIFT 16 -#define NV50TCL_RT_CONTROL_MAP4_MASK 0x00070000 -#define NV50TCL_RT_CONTROL_MAP5_SHIFT 19 -#define NV50TCL_RT_CONTROL_MAP5_MASK 0x00380000 -#define NV50TCL_RT_CONTROL_MAP6_SHIFT 22 -#define NV50TCL_RT_CONTROL_MAP6_MASK 0x01c00000 -#define NV50TCL_RT_CONTROL_MAP7_SHIFT 25 -#define NV50TCL_RT_CONTROL_MAP7_MASK 0x0e000000 -#define NV50TCL_RT_ARRAY_MODE 0x00001224 -#define NV50TCL_RT_ARRAY_MODE_LAYERS_SHIFT 0 -#define NV50TCL_RT_ARRAY_MODE_LAYERS_MASK 0x0000ffff -#define NV50TCL_RT_ARRAY_MODE_VOLUME (1 << 16) -#define NV50TCL_ZETA_HORIZ 0x00001228 -#define NV50TCL_ZETA_VERT 0x0000122c -#define NV50TCL_ZETA_ARRAY_MODE 0x00001230 -#define NV50TCL_ZETA_ARRAY_MODE_LAYERS_SHIFT 0 -#define NV50TCL_ZETA_ARRAY_MODE_LAYERS_MASK 0x0000ffff -#define NV50TCL_ZETA_ARRAY_MODE_UNK (1 << 16) -#define NV50TCL_LINKED_TSC 0x00001234 -#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8)) -#define NV50TCL_RT_HORIZ__SIZE 0x00000008 -#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8)) -#define NV50TCL_RT_VERT__SIZE 0x00000008 -#define NV50TCL_CB_DEF_ADDRESS_HIGH 0x00001280 -#define NV50TCL_CB_DEF_ADDRESS_LOW 0x00001284 -#define NV50TCL_CB_DEF_SET 0x00001288 -#define NV50TCL_CB_DEF_SET_SIZE_SHIFT 0 -#define NV50TCL_CB_DEF_SET_SIZE_MASK 0x0000ffff -#define NV50TCL_CB_DEF_SET_BUFFER_SHIFT 16 -#define NV50TCL_CB_DEF_SET_BUFFER_MASK 0x007f0000 -#define NV50TCL_STRMOUT_BUFFERS_CTRL 0x00001294 -#define NV50TCL_STRMOUT_BUFFERS_CTRL_INTERLEAVED (1 << 0) -#define NV50TCL_STRMOUT_BUFFERS_CTRL_SEPARATE_SHIFT 4 -#define NV50TCL_STRMOUT_BUFFERS_CTRL_SEPARATE_MASK 0x000000f0 -#define NV50TCL_STRMOUT_BUFFERS_CTRL_STRIDE_SHIFT 8 -#define NV50TCL_STRMOUT_BUFFERS_CTRL_STRIDE_MASK 0x0000ff00 -#define NV50TCL_FP_RESULT_COUNT 0x00001298 -#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc -#define NV50TCL_SHADE_MODEL 0x000012d4 -#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00 -#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01 -#define NV50TCL_LOCAL_ADDRESS_HIGH 0x000012d8 -#define NV50TCL_LOCAL_ADDRESS_LOW 0x000012dc -#define NV50TCL_LOCAL_SIZE_LOG 0x000012e0 -#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8 -#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec -#define NV50TCL_PM_SET(x) (0x000012f0+((x)*4)) -#define NV50TCL_PM_SET__SIZE 0x00000004 -#define NV50TCL_VB_ELEMENT_U8_SETUP 0x00001300 -#define NV50TCL_VB_ELEMENT_U8_SETUP_OFFSET_SHIFT 30 -#define NV50TCL_VB_ELEMENT_U8_SETUP_OFFSET_MASK 0xc0000000 -#define NV50TCL_VB_ELEMENT_U8_SETUP_COUNT_SHIFT 0 -#define NV50TCL_VB_ELEMENT_U8_SETUP_COUNT_MASK 0x3fffffff -#define NV50TCL_VB_ELEMENT_U8 0x00001304 -#define NV50TCL_VB_ELEMENT_U8_I0_SHIFT 0 -#define NV50TCL_VB_ELEMENT_U8_I0_MASK 0x000000ff -#define NV50TCL_VB_ELEMENT_U8_I1_SHIFT 8 -#define NV50TCL_VB_ELEMENT_U8_I1_MASK 0x0000ff00 -#define NV50TCL_VB_ELEMENT_U8_I2_SHIFT 16 -#define NV50TCL_VB_ELEMENT_U8_I2_MASK 0x00ff0000 -#define NV50TCL_VB_ELEMENT_U8_I3_SHIFT 24 -#define NV50TCL_VB_ELEMENT_U8_I3_MASK 0xff000000 -#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c -#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200 -#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201 -#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202 -#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203 -#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 -#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206 -#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207 -#define NV50TCL_ALPHA_TEST_REF 0x00001310 -#define NV50TCL_ALPHA_TEST_FUNC 0x00001314 -#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 -#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201 -#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 -#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 -#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 -#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 -#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 -#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4)) -#define NV50TCL_BLEND_COLOR__SIZE 0x00000004 -#define NV50TCL_TIC_FLUSH 0x00001330 -#define NV50TCL_TSC_FLUSH 0x00001334 -#define NV50TCL_TEX_CACHE_CTL 0x00001338 -#define NV50TCL_BLEND_EQUATION_RGB 0x00001340 -#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 -#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007 -#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008 -#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a -#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00004000 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00004001 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00004300 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00004302 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303 -#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00004304 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305 -#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00004306 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308 -#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x0000c001 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002 -#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x0000c003 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR 0x0000c900 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA 0x0000c902 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903 -#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348 -#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00004000 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00004001 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00004300 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00004302 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303 -#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00004304 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305 -#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00004306 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00004308 -#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x0000c001 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002 -#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x0000c003 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR 0x0000c900 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA 0x0000c902 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903 -#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c -#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 -#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007 -#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008 -#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a -#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00004000 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00004001 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00004300 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00004302 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00004304 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00004306 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x0000c001 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR 0x0000c900 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA 0x0000c902 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903 -#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00004000 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00004001 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00004300 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00004302 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00004304 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00004306 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x0000c001 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x0000c003 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR 0x0000c900 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903 -#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4)) -#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008 -#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001380 -#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001384 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x00001388 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x0000138c -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x00001390 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 -#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00001394 -#define NV50TCL_STENCIL_FRONT_MASK 0x00001398 -#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x0000139c -#define NV50TCL_FRAG_COLOR_CLAMP_EN 0x000013a8 -#define NV50TCL_Y_ORIGIN_BOTTOM 0x000013ac -#define NV50TCL_LINE_WIDTH 0x000013b0 -#define NV50TCL_TEX_LIMITS(x) (0x000013b4+((x)*4)) -#define NV50TCL_TEX_LIMITS__SIZE 0x00000003 -#define NV50TCL_TEX_LIMITS_SAMPLERS_LOG2_SHIFT 0 -#define NV50TCL_TEX_LIMITS_SAMPLERS_LOG2_MASK 0x0000000f -#define NV50TCL_TEX_LIMITS_TEXTURES_LOG2_SHIFT 4 -#define NV50TCL_TEX_LIMITS_TEXTURES_LOG2_MASK 0x000000f0 -#define NV50TCL_POINT_COORD_REPLACE_MAP(x) (0x000013c0+((x)*4)) -#define NV50TCL_POINT_COORD_REPLACE_MAP__SIZE 0x00000008 -#define NV50TCL_VP_START_ID 0x0000140c -#define NV50TCL_GP_START_ID 0x00001410 -#define NV50TCL_FP_START_ID 0x00001414 -#define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420 -#define NV50TCL_VB_ELEMENT_BASE 0x00001434 -#define NV50TCL_CLEAR_FLAGS 0x0000143c -#define NV50TCL_CLEAR_FLAGS_OGL (1 << 0) -#define NV50TCL_CLEAR_FLAGS_D3D (1 << 4) -#define NV50TCL_INSTANCE_BASE 0x00001438 -#define NV50TCL_CODE_CB_FLUSH 0x00001440 -#define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8)) -#define NV50TCL_BIND_TSC__SIZE 0x00000003 -#define NV50TCL_BIND_TSC_VALID (1 << 0) -#define NV50TCL_BIND_TSC_SAMPLER_SHIFT 4 -#define NV50TCL_BIND_TSC_SAMPLER_MASK 0x000000f0 -#define NV50TCL_BIND_TSC_TSC_SHIFT 12 -#define NV50TCL_BIND_TSC_TSC_MASK 0x001ff000 -#define NV50TCL_BIND_TIC(x) (0x00001448+((x)*8)) -#define NV50TCL_BIND_TIC__SIZE 0x00000003 -#define NV50TCL_BIND_TIC_VALID (1 << 0) -#define NV50TCL_BIND_TIC_TEXTURE_SHIFT 1 -#define NV50TCL_BIND_TIC_TEXTURE_MASK 0x000001fe -#define NV50TCL_BIND_TIC_TIC_SHIFT 9 -#define NV50TCL_BIND_TIC_TIC_MASK 0x7ffffe00 -#define NV50TCL_STRMOUT_MAP(x) (0x00001480+((x)*4)) -#define NV50TCL_STRMOUT_MAP__SIZE 0x00000020 -#define NV50TCL_CLIPID_HEIGHT 0x00001504 -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE 0x00001510 -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_0 (1 << 0) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_1 (1 << 1) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_2 (1 << 2) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_3 (1 << 3) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_4 (1 << 4) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_5 (1 << 5) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_6 (1 << 6) -#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_7 (1 << 7) -#define NV50TCL_SAMPLECNT_ENABLE 0x00001514 -#define NV50TCL_POINT_SIZE 0x00001518 -#define NV50TCL_POINT_SPRITE_ENABLE 0x00001520 -#define NV50TCL_SAMPLECNT_RESET 0x00001530 -#define NV50TCL_ZETA_ENABLE 0x00001538 -#define NV50TCL_MULTISAMPLE_CTRL 0x0000153c -#define NV50TCL_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE (1 << 0) -#define NV50TCL_MULTISAMPLE_CTRL_ALPHA_TO_ONE (1 << 4) -#define NV50TCL_NOPERSPECTIVE_BITMAP(x) (0x00001540+((x)*4)) -#define NV50TCL_NOPERSPECTIVE_BITMAP__SIZE 0x00000004 -#define NV50TCL_COND_ADDRESS_HIGH 0x00001550 -#define NV50TCL_COND_ADDRESS_LOW 0x00001554 -#define NV50TCL_COND_MODE 0x00001558 -#define NV50TCL_COND_MODE_NEVER 0x00000000 -#define NV50TCL_COND_MODE_ALWAYS 0x00000001 -#define NV50TCL_COND_MODE_RES 0x00000002 -#define NV50TCL_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003 -#define NV50TCL_COND_MODE_RES_OR_ID 0x00000004 -#define NV50TCL_TSC_ADDRESS_HIGH 0x0000155c -#define NV50TCL_TSC_ADDRESS_LOW 0x00001560 -#define NV50TCL_TSC_LIMIT 0x00001564 -#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c -#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570 -#define NV50TCL_TIC_ADDRESS_HIGH 0x00001574 -#define NV50TCL_TIC_ADDRESS_LOW 0x00001578 -#define NV50TCL_TIC_LIMIT 0x0000157c -#define NV50TCL_PM_CONTROL(x) (0x00001580+((x)*4)) -#define NV50TCL_PM_CONTROL__SIZE 0x00000004 -#define NV50TCL_PM_CONTROL_UNK0 (1 << 0) -#define NV50TCL_PM_CONTROL_UNK1_SHIFT 4 -#define NV50TCL_PM_CONTROL_UNK1_MASK 0x00000070 -#define NV50TCL_PM_CONTROL_UNK2_SHIFT 8 -#define NV50TCL_PM_CONTROL_UNK2_MASK 0xffffff00 -#define NV50TCL_STENCIL_BACK_ENABLE 0x00001594 -#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001598 -#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x0000159c -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x000015a0 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a -#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x000015a4 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 -#define NV50TCL_FRAMEBUFFER_SRGB 0x000015b8 -#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc -#define NV50TCL_GP_BUILTIN_RESULT_EN 0x000015cc -#define NV50TCL_GP_BUILTIN_RESULT_EN_VPORT_IDX (1 << 0) -#define NV50TCL_GP_BUILTIN_RESULT_EN_LAYER_IDX (1 << 16) -#define NV50TCL_MULTISAMPLE_MODE 0x000015d0 -#define NV50TCL_MULTISAMPLE_MODE_1X 0x00000000 -#define NV50TCL_MULTISAMPLE_MODE_2XMS 0x00000001 -#define NV50TCL_MULTISAMPLE_MODE_4XMS 0x00000002 -#define NV50TCL_MULTISAMPLE_MODE_8XMS 0x00000004 -#define NV50TCL_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008 -#define NV50TCL_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009 -#define NV50TCL_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a -#define NV50TCL_VERTEX_BEGIN 0x000015dc -#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000 -#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001 -#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002 -#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003 -#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004 -#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005 -#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006 -#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007 -#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008 -#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009 -#define NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY 0x0000000a -#define NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY 0x0000000b -#define NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY 0x0000000c -#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY 0x0000000d -#define NV50TCL_VERTEX_BEGIN_PATCHES 0x0000000e -#define NV50TCL_VERTEX_END 0x000015e0 -#define NV50TCL_EDGEFLAG_ENABLE 0x000015e4 -#define NV50TCL_VB_ELEMENT_U32 0x000015e8 -#define NV50TCL_VB_ELEMENT_U16_SETUP 0x000015ec -#define NV50TCL_VB_ELEMENT_U16_SETUP_OFFSET_SHIFT 30 -#define NV50TCL_VB_ELEMENT_U16_SETUP_OFFSET_MASK 0xc0000000 -#define NV50TCL_VB_ELEMENT_U16_SETUP_COUNT_SHIFT 0 -#define NV50TCL_VB_ELEMENT_U16_SETUP_COUNT_MASK 0x3fffffff -#define NV50TCL_VB_ELEMENT_U16 0x000015f0 -#define NV50TCL_VB_ELEMENT_U16_I0_SHIFT 0 -#define NV50TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff -#define NV50TCL_VB_ELEMENT_U16_I1_SHIFT 16 -#define NV50TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 -#define NV50TCL_VERTEX_BASE_HIGH 0x000015f4 -#define NV50TCL_VERTEX_BASE_LOW 0x000015f8 -#define NV50TCL_VERTEX_DATA 0x00001640 -#define NV50TCL_PRIM_RESTART_ENABLE 0x00001644 -#define NV50TCL_PRIM_RESTART_INDEX 0x00001648 -#define NV50TCL_VP_GP_BUILTIN_ATTR_EN 0x0000164c -#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID (1 << 0) -#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID (1 << 4) -#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID (1 << 8) -#define NV50TCL_VP_GP_BUILTIN_ATTR_EN_UNK12 (1 << 12) -#define NV50TCL_VP_ATTR_EN_0 0x00001650 -#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28 -#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000 -#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000 -#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000 -#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000 -#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000 -#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24 -#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000 -#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000 -#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000 -#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000 -#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000 -#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20 -#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000 -#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000 -#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000 -#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000 -#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000 -#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000 -#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000 -#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000 -#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000 -#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000 -#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000 -#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000 -#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000 -#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000 -#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000 -#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000 -#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16 -#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000 -#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000 -#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000 -#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000 -#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000 -#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000 -#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000 -#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000 -#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000 -#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000 -#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000 -#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000 -#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000 -#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000 -#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000 -#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000 -#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12 -#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000 -#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000 -#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000 -#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000 -#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000 -#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000 -#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000 -#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000 -#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000 -#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000 -#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000 -#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000 -#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000 -#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000 -#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000 -#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000 -#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8 -#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00 -#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100 -#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200 -#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300 -#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400 -#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500 -#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600 -#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700 -#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800 -#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900 -#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00 -#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00 -#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00 -#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00 -#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00 -#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00 -#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4 -#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0 -#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010 -#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020 -#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030 -#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040 -#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050 -#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060 -#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070 -#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080 -#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090 -#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0 -#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0 -#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0 -#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0 -#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0 -#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0 -#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0 -#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f -#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001 -#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002 -#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003 -#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004 -#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005 -#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006 -#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007 -#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008 -#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009 -#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a -#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b -#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c -#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d -#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e -#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f -#define NV50TCL_VP_ATTR_EN_1 0x00001654 -#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28 -#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000 -#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000 -#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000 -#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000 -#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000 -#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24 -#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000 -#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000 -#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000 -#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000 -#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000 -#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20 -#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000 -#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000 -#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000 -#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000 -#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000 -#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000 -#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000 -#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000 -#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000 -#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000 -#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000 -#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000 -#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000 -#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000 -#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000 -#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000 -#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16 -#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000 -#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000 -#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000 -#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000 -#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000 -#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000 -#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000 -#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000 -#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000 -#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000 -#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000 -#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000 -#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000 -#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000 -#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000 -#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000 -#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12 -#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000 -#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000 -#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000 -#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000 -#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000 -#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000 -#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000 -#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000 -#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000 -#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000 -#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000 -#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000 -#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000 -#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000 -#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000 -#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000 -#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8 -#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00 -#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100 -#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200 -#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300 -#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400 -#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500 -#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600 -#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700 -#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800 -#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900 -#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00 -#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00 -#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00 -#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00 -#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00 -#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00 -#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4 -#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0 -#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010 -#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020 -#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030 -#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040 -#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050 -#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060 -#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070 -#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080 -#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090 -#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0 -#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0 -#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0 -#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0 -#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0 -#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0 -#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0 -#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f -#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001 -#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002 -#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003 -#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004 -#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005 -#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006 -#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007 -#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008 -#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009 -#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a -#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b -#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c -#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d -#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e -#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f -#define NV50TCL_POINT_SPRITE_CTRL 0x00001660 -#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c -#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680 -#define NV50TCL_PROVOKING_VERTEX_LAST 0x00001684 -#define NV50TCL_VERTEX_TWO_SIDE_ENABLE 0x00001688 -#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c -#define NV50TCL_SET_PROGRAM_CB 0x00001694 -#define NV50TCL_SET_PROGRAM_CB_PROGRAM_SHIFT 4 -#define NV50TCL_SET_PROGRAM_CB_PROGRAM_MASK 0x000000f0 -#define NV50TCL_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000 -#define NV50TCL_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020 -#define NV50TCL_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030 -#define NV50TCL_SET_PROGRAM_CB_INDEX_SHIFT 8 -#define NV50TCL_SET_PROGRAM_CB_INDEX_MASK 0x00000f00 -#define NV50TCL_SET_PROGRAM_CB_BUFFER_SHIFT 12 -#define NV50TCL_SET_PROGRAM_CB_BUFFER_MASK 0x0007f000 -#define NV50TCL_SET_PROGRAM_CB_VALID (1 << 0) -#define NV50TCL_VP_RESULT_MAP_SIZE 0x000016ac -#define NV50TCL_VP_REG_ALLOC_TEMP 0x000016b0 -#define NV50TCL_VP_REG_ALLOC_RESULT 0x000016b8 -#define NV50TCL_VP_RESULT_MAP(x) (0x000016bc+((x)*4)) -#define NV50TCL_VP_RESULT_MAP__SIZE 0x00000010 -#define NV50TCL_VP_RESULT_MAP_0_SHIFT 0 -#define NV50TCL_VP_RESULT_MAP_0_MASK 0x000000ff -#define NV50TCL_VP_RESULT_MAP_1_SHIFT 8 -#define NV50TCL_VP_RESULT_MAP_1_MASK 0x0000ff00 -#define NV50TCL_VP_RESULT_MAP_2_SHIFT 16 -#define NV50TCL_VP_RESULT_MAP_2_MASK 0x00ff0000 -#define NV50TCL_VP_RESULT_MAP_3_SHIFT 24 -#define NV50TCL_VP_RESULT_MAP_3_MASK 0xff000000 -#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4)) -#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 -#define NV50TCL_GP_ENABLE 0x00001798 -#define NV50TCL_GP_REG_ALLOC_TEMP 0x000017a0 -#define NV50TCL_GP_REG_ALLOC_RESULT 0x000017a8 -#define NV50TCL_GP_RESULT_MAP_SIZE 0x000017ac -#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0 -#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001 -#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002 -#define NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003 -#define NV50TCL_RASTERIZE_ENABLE 0x000017b4 -#define NV50TCL_STRMOUT_ENABLE 0x000017b8 -#define NV50TCL_GP_RESULT_MAP(x) (0x000017fc+((x)*4)) -#define NV50TCL_GP_RESULT_MAP__SIZE 0x00000020 -#define NV50TCL_GP_RESULT_MAP_0_SHIFT 0 -#define NV50TCL_GP_RESULT_MAP_0_MASK 0x000000ff -#define NV50TCL_GP_RESULT_MAP_1_SHIFT 8 -#define NV50TCL_GP_RESULT_MAP_1_MASK 0x0000ff00 -#define NV50TCL_GP_RESULT_MAP_2_SHIFT 16 -#define NV50TCL_GP_RESULT_MAP_2_MASK 0x00ff0000 -#define NV50TCL_GP_RESULT_MAP_3_SHIFT 24 -#define NV50TCL_GP_RESULT_MAP_3_MASK 0xff000000 -#define NV50TCL_MAP_SEMANTIC_0 0x00001904 -#define NV50TCL_MAP_SEMANTIC_0_FFC0_ID_SHIFT 0 -#define NV50TCL_MAP_SEMANTIC_0_FFC0_ID_MASK 0x000000ff -#define NV50TCL_MAP_SEMANTIC_0_BFC0_ID_SHIFT 8 -#define NV50TCL_MAP_SEMANTIC_0_BFC0_ID_MASK 0x0000ff00 -#define NV50TCL_MAP_SEMANTIC_0_COLR_NR_SHIFT 16 -#define NV50TCL_MAP_SEMANTIC_0_COLR_NR_MASK 0x00ff0000 -#define NV50TCL_MAP_SEMANTIC_0_CLMP_EN_SHIFT 24 -#define NV50TCL_MAP_SEMANTIC_0_CLMP_EN_MASK 0xff000000 -#define NV50TCL_MAP_SEMANTIC_1 0x00001908 -#define NV50TCL_MAP_SEMANTIC_1_CLIP_LO_SHIFT 0 -#define NV50TCL_MAP_SEMANTIC_1_CLIP_LO_MASK 0x000000ff -#define NV50TCL_MAP_SEMANTIC_1_CLIP_HI_SHIFT 8 -#define NV50TCL_MAP_SEMANTIC_1_CLIP_HI_MASK 0x0000ff00 -#define NV50TCL_MAP_SEMANTIC_2 0x0000190c -#define NV50TCL_MAP_SEMANTIC_2_LAYER_ID_SHIFT 0 -#define NV50TCL_MAP_SEMANTIC_2_LAYER_ID_MASK 0x000000ff -#define NV50TCL_MAP_SEMANTIC_3 0x00001910 -#define NV50TCL_MAP_SEMANTIC_3_PTSZ_EN (1 << 0) -#define NV50TCL_MAP_SEMANTIC_3_PTSZ_ID_SHIFT 4 -#define NV50TCL_MAP_SEMANTIC_3_PTSZ_ID_MASK 0x00000ff0 -#define NV50TCL_MAP_SEMANTIC_4 0x00001914 -#define NV50TCL_MAP_SEMANTIC_4_PRIM_ID_SHIFT 0 -#define NV50TCL_MAP_SEMANTIC_4_PRIM_ID_MASK 0x000000ff -#define NV50TCL_CULL_FACE_ENABLE 0x00001918 -#define NV50TCL_FRONT_FACE 0x0000191c -#define NV50TCL_FRONT_FACE_CW 0x00000900 -#define NV50TCL_FRONT_FACE_CCW 0x00000901 -#define NV50TCL_CULL_FACE 0x00001920 -#define NV50TCL_CULL_FACE_FRONT 0x00000404 -#define NV50TCL_CULL_FACE_BACK 0x00000405 -#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV50TCL_VIEWPORT_TRANSFORM_EN 0x0000192c -#define NV50TCL_VIEW_VOLUME_CLIP_CTRL 0x0000193c -#define NV50TCL_VIEWPORT_CLIP_RECTS_EN 0x0000194c -#define NV50TCL_VIEWPORT_CLIP_MODE 0x00001950 -#define NV50TCL_VIEWPORT_CLIP_MODE_INCLUDE 0x00000000 -#define NV50TCL_VIEWPORT_CLIP_MODE_EXCLUDE 0x00000001 -#define NV50TCL_VIEWPORT_CLIP_MODE_UNKNOWN 0x00000002 -#define NV50TCL_FP_CTRL_UNK196C 0x0000196c -#define NV50TCL_CLIPID_ENABLE 0x0000197c -#define NV50TCL_CLIPID_WIDTH 0x00001980 -#define NV50TCL_CLIPID_ID 0x00001984 -#define NV50TCL_FP_INTERPOLANT_CTRL 0x00001988 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_SHIFT 24 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_MASK 0xff000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NONE 0x00000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNNN 0x01000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYNN 0x02000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYNN 0x03000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NNZN 0x04000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNZN 0x05000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYZN 0x06000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYZN 0x07000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NNNW 0x08000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNNW 0x09000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYNW 0x0a000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYNW 0x0b000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NNZW 0x0c000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XNZW 0x0d000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_NYZW 0x0e000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_XYZW 0x0f000000 -#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT 16 -#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_MASK 0x00ff0000 -#define NV50TCL_FP_INTERPOLANT_CTRL_OFFSET_SHIFT 8 -#define NV50TCL_FP_INTERPOLANT_CTRL_OFFSET_MASK 0x0000ff00 -#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT 0 -#define NV50TCL_FP_INTERPOLANT_CTRL_COUNT_MASK 0x000000ff -#define NV50TCL_FP_REG_ALLOC_TEMP 0x0000198c -#define NV50TCL_REG_MODE 0x000019a0 -#define NV50TCL_REG_MODE_PACKED 0x00000001 -#define NV50TCL_REG_MODE_STRIPED 0x00000002 -#define NV50TCL_FP_CONTROL 0x000019a8 -#define NV50TCL_FP_CONTROL_MULTIPLE_RESULTS (1 << 0) -#define NV50TCL_FP_CONTROL_EXPORTS_Z (1 << 8) -#define NV50TCL_FP_CONTROL_USES_KIL (1 << 20) -#define NV50TCL_DEPTH_BOUNDS_EN 0x000019bc -#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4 -#define NV50TCL_LOGIC_OP 0x000019c8 -#define NV50TCL_LOGIC_OP_CLEAR 0x00001500 -#define NV50TCL_LOGIC_OP_AND 0x00001501 -#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502 -#define NV50TCL_LOGIC_OP_COPY 0x00001503 -#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504 -#define NV50TCL_LOGIC_OP_NOOP 0x00001505 -#define NV50TCL_LOGIC_OP_XOR 0x00001506 -#define NV50TCL_LOGIC_OP_OR 0x00001507 -#define NV50TCL_LOGIC_OP_NOR 0x00001508 -#define NV50TCL_LOGIC_OP_EQUIV 0x00001509 -#define NV50TCL_LOGIC_OP_INVERT 0x0000150a -#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b -#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c -#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d -#define NV50TCL_LOGIC_OP_NAND 0x0000150e -#define NV50TCL_LOGIC_OP_SET 0x0000150f -#define NV50TCL_CLEAR_BUFFERS 0x000019d0 -#define NV50TCL_CLEAR_BUFFERS_Z (1 << 0) -#define NV50TCL_CLEAR_BUFFERS_S (1 << 1) -#define NV50TCL_CLEAR_BUFFERS_R (1 << 2) -#define NV50TCL_CLEAR_BUFFERS_G (1 << 3) -#define NV50TCL_CLEAR_BUFFERS_B (1 << 4) -#define NV50TCL_CLEAR_BUFFERS_A (1 << 5) -#define NV50TCL_CLEAR_BUFFERS_RT_SHIFT 6 -#define NV50TCL_CLEAR_BUFFERS_RT_MASK 0x000003c0 -#define NV50TCL_CLEAR_BUFFERS_LAYER_SHIFT 10 -#define NV50TCL_CLEAR_BUFFERS_LAYER_MASK 0x0007fc00 -#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4)) -#define NV50TCL_COLOR_MASK__SIZE 0x00000008 -#define NV50TCL_COLOR_MASK_R_SHIFT 0 -#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f -#define NV50TCL_COLOR_MASK_G_SHIFT 4 -#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0 -#define NV50TCL_COLOR_MASK_B_SHIFT 8 -#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00 -#define NV50TCL_COLOR_MASK_A_SHIFT 12 -#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000 -#define NV50TCL_STRMOUT_ADDRESS_HIGH(x) (0x00001a80+((x)*16)) -#define NV50TCL_STRMOUT_ADDRESS_HIGH__SIZE 0x00000004 -#define NV50TCL_STRMOUT_ADDRESS_LOW(x) (0x00001a84+((x)*16)) -#define NV50TCL_STRMOUT_ADDRESS_LOW__SIZE 0x00000004 -#define NV50TCL_STRMOUT_NUM_ATTRIBS(x) (0x00001a88+((x)*16)) -#define NV50TCL_STRMOUT_NUM_ATTRIBS__SIZE 0x00000004 -#define NV50TCL_VERTEX_ARRAY_ATTRIB(x) (0x00001ac0+((x)*4)) -#define NV50TCL_VERTEX_ARRAY_ATTRIB__SIZE 0x00000010 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_BUFFER_SHIFT 0 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_BUFFER_MASK 0x0000000f -#define NV50TCL_VERTEX_ARRAY_ATTRIB_CONST (1 << 4) -#define NV50TCL_VERTEX_ARRAY_ATTRIB_OFFSET_SHIFT 5 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_OFFSET_MASK 0x0007ffe0 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_SHIFT 19 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_MASK 0x01f80000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SHIFT 25 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x0e000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x0e000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x02000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x04000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x0a000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x0c000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x08000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x06000000 -#define NV50TCL_VERTEX_ARRAY_ATTRIB_BGRA (1 << 31) -#define NV50TCL_QUERY_ADDRESS_HIGH 0x00001b00 -#define NV50TCL_QUERY_ADDRESS_LOW 0x00001b04 -#define NV50TCL_QUERY_SEQUENCE 0x00001b08 -#define NV50TCL_QUERY_GET 0x00001b0c - - -#define NV84TCL 0x00008297 - - - -#define NVA0TCL 0x00008397 - - - -#define NVA8TCL 0x00008597 - - - -#define NV50_COMPUTE 0x000050c0 - -#define NV50_COMPUTE_NOP 0x00000100 -#define NV50_COMPUTE_NOTIFY 0x00000104 -#define NV50_COMPUTE_SERIALIZE 0x00000110 -#define NV50_COMPUTE_DMA_NOTIFY 0x00000180 -#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0 -#define NV50_COMPUTE_DMA_QUERY 0x000001a4 -#define NV50_COMPUTE_DMA_LOCAL 0x000001b8 -#define NV50_COMPUTE_DMA_STACK 0x000001bc -#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0 -#define NV50_COMPUTE_DMA_TSC 0x000001c4 -#define NV50_COMPUTE_DMA_TIC 0x000001c8 -#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc -#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210 -#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214 -#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218 -#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c -#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220 -#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c -#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230 -#define NV50_COMPUTE_TSC_LIMIT 0x00000234 -#define NV50_COMPUTE_CB_ADDR 0x00000238 -#define NV50_COMPUTE_CB_ADDR_ID_SHIFT 8 -#define NV50_COMPUTE_CB_ADDR_ID_MASK 0x003fff00 -#define NV50_COMPUTE_CB_ADDR_BUFFER_SHIFT 0 -#define NV50_COMPUTE_CB_ADDR_BUFFER_MASK 0x0000007f -#define NV50_COMPUTE_CB_DATA(x) (0x0000023c+((x)*4)) -#define NV50_COMPUTE_CB_DATA__SIZE 0x00000010 -#define NV50_COMPUTE_DELAY1 0x00000284 -#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288 -#define NV50_COMPUTE_DELAY2 0x0000028c -#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294 -#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298 -#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c -#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4 -#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8 -#define NV50_COMPUTE_CB_DEF_SET 0x000002ac -#define NV50_COMPUTE_CB_DEF_SET_SIZE_SHIFT 0 -#define NV50_COMPUTE_CB_DEF_SET_SIZE_MASK 0x0000ffff -#define NV50_COMPUTE_CB_DEF_SET_BUFFER_SHIFT 16 -#define NV50_COMPUTE_CB_DEF_SET_BUFFER_MASK 0x007f0000 -#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4 -#define NV50_COMPUTE_BLOCK_ALLOC_THREADS_SHIFT 0 -#define NV50_COMPUTE_BLOCK_ALLOC_THREADS_MASK 0x0000ffff -#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS_SHIFT 16 -#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS_MASK 0xffff0000 -#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8 -#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0 -#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4 -#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8 -#define NV50_COMPUTE_TIC_LIMIT 0x000002cc -#define NV50_COMPUTE_PM_SET(x) (0x000002d0+((x)*4)) -#define NV50_COMPUTE_PM_SET__SIZE 0x00000004 -#define NV50_COMPUTE_PM_CONTROL(x) (0x000002e0+((x)*4)) -#define NV50_COMPUTE_PM_CONTROL__SIZE 0x00000004 -#define NV50_COMPUTE_PM_CONTROL_UNK0 (1 << 0) -#define NV50_COMPUTE_PM_CONTROL_UNK1_SHIFT 4 -#define NV50_COMPUTE_PM_CONTROL_UNK1_MASK 0x00000070 -#define NV50_COMPUTE_PM_CONTROL_UNK2_SHIFT 8 -#define NV50_COMPUTE_PM_CONTROL_UNK2_MASK 0xffffff00 -#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc -#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300 -#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304 -#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308 -#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310 -#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314 -#define NV50_COMPUTE_QUERY_COUNTER 0x00000318 -#define NV50_COMPUTE_QUERY_GET 0x0000031c -#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320 -#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324 -#define NV50_COMPUTE_COND_MODE 0x00000328 -#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000 -#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001 -#define NV50_COMPUTE_COND_MODE_RES 0x00000002 -#define NV50_COMPUTE_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003 -#define NV50_COMPUTE_COND_MODE_RES_OR_ID 0x00000004 -#define NV50_COMPUTE_LAUNCH 0x00000368 -#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 -#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT_SHIFT 8 -#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT_MASK 0x0000ff00 -#define NV50_COMPUTE_LINKED_TSC 0x00000378 -#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380 -#define NV50_COMPUTE_GRIDDIM 0x000003a4 -#define NV50_COMPUTE_GRIDDIM_X_SHIFT 0 -#define NV50_COMPUTE_GRIDDIM_X_MASK 0x0000ffff -#define NV50_COMPUTE_GRIDDIM_Y_SHIFT 16 -#define NV50_COMPUTE_GRIDDIM_Y_MASK 0xffff0000 -#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 -#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac -#define NV50_COMPUTE_BLOCKDIM_YX_X_SHIFT 0 -#define NV50_COMPUTE_BLOCKDIM_YX_X_MASK 0x0000ffff -#define NV50_COMPUTE_BLOCKDIM_YX_Y_SHIFT 16 -#define NV50_COMPUTE_BLOCKDIM_YX_Y_MASK 0xffff0000 -#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 -#define NV50_COMPUTE_CP_START_ID 0x000003b4 -#define NV50_COMPUTE_REG_MODE 0x000003b8 -#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001 -#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002 -#define NV50_COMPUTE_TEX_LIMITS 0x000003bc -#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2_SHIFT 0 -#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2_MASK 0x0000000f -#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2_SHIFT 4 -#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2_MASK 0x000000f0 -#define NV50_COMPUTE_BIND_TSC 0x000003c0 -#define NV50_COMPUTE_BIND_TSC_VALID (1 << 0) -#define NV50_COMPUTE_BIND_TSC_SAMPLER_SHIFT 4 -#define NV50_COMPUTE_BIND_TSC_SAMPLER_MASK 0x000000f0 -#define NV50_COMPUTE_BIND_TSC_TSC_SHIFT 12 -#define NV50_COMPUTE_BIND_TSC_TSC_MASK 0x001ff000 -#define NV50_COMPUTE_BIND_TIC 0x000003c4 -#define NV50_COMPUTE_BIND_TIC_VALID (1 << 0) -#define NV50_COMPUTE_BIND_TIC_TEXTURE_SHIFT 1 -#define NV50_COMPUTE_BIND_TIC_TEXTURE_MASK 0x000001fe -#define NV50_COMPUTE_BIND_TIC_TIC_SHIFT 9 -#define NV50_COMPUTE_BIND_TIC_TIC_MASK 0x7ffffe00 -#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8 -#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX_SHIFT 8 -#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX_MASK 0x00000f00 -#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER_SHIFT 12 -#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER_MASK 0x0007f000 -#define NV50_COMPUTE_SET_PROGRAM_CB_VALID (1 << 0) -#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(x) (0x00000400+((x)*32)) -#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(x) (0x00000404+((x)*32)) -#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_PITCH(x) (0x00000408+((x)*32)) -#define NV50_COMPUTE_GLOBAL_PITCH__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_LIMIT(x) (0x0000040c+((x)*32)) -#define NV50_COMPUTE_GLOBAL_LIMIT__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_MODE(x) (0x00000410+((x)*32)) -#define NV50_COMPUTE_GLOBAL_MODE__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_MODE_LINEAR (1 << 0) -#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE_SHIFT 8 -#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE_MASK 0x00000f00 -#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4)) -#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040 - - -#endif /* NOUVEAU_REG_H */ diff --git a/src/gallium/drivers/nv50/nv50_resource.c b/src/gallium/drivers/nv50/nv50_resource.c index 6c0a969635..1ae4d70a84 100644 --- a/src/gallium/drivers/nv50/nv50_resource.c +++ b/src/gallium/drivers/nv50/nv50_resource.c @@ -4,64 +4,48 @@ #include "nouveau/nouveau_screen.h" -/* This doesn't look quite right - this query is supposed to ask - * whether the particular context has references to the resource in - * any unflushed rendering command buffer, and hence requires a - * pipe->flush() for serializing some modification to that resource. - * - * This seems to be answering the question of whether the resource is - * currently on hardware. - */ -static unsigned int -nv50_resource_is_referenced(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - return nouveau_reference_flags(nv50_resource(resource)->bo); -} - static struct pipe_resource * nv50_resource_create(struct pipe_screen *screen, - const struct pipe_resource *template) + const struct pipe_resource *templ) { - if (template->target == PIPE_BUFFER) - return nv50_buffer_create(screen, template); - else - return nv50_miptree_create(screen, template); + switch (templ->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(screen, templ); + default: + return nv50_miptree_create(screen, templ); + } } static struct pipe_resource * nv50_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *template, - struct winsys_handle *whandle) + const struct pipe_resource *templ, + struct winsys_handle *whandle) { - if (template->target == PIPE_BUFFER) - return NULL; - else - return nv50_miptree_from_handle(screen, template, whandle); + if (templ->target == PIPE_BUFFER) + return NULL; + else + return nv50_miptree_from_handle(screen, templ, whandle); } void nv50_init_resource_functions(struct pipe_context *pcontext) { - pcontext->get_transfer = u_get_transfer_vtbl; - pcontext->transfer_map = u_transfer_map_vtbl; - pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; - pcontext->transfer_unmap = u_transfer_unmap_vtbl; - pcontext->transfer_destroy = u_transfer_destroy_vtbl; - pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; - pcontext->is_resource_referenced = nv50_resource_is_referenced; - - pcontext->create_surface = nv50_miptree_surface_new; - pcontext->surface_destroy = nv50_miptree_surface_del; + pcontext->get_transfer = u_get_transfer_vtbl; + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_destroy = u_transfer_destroy_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->create_surface = nv50_miptree_surface_new; + pcontext->surface_destroy = nv50_miptree_surface_del; } void nv50_screen_init_resource_functions(struct pipe_screen *pscreen) { - pscreen->resource_create = nv50_resource_create; - pscreen->resource_from_handle = nv50_resource_from_handle; - pscreen->resource_get_handle = u_resource_get_handle_vtbl; - pscreen->resource_destroy = u_resource_destroy_vtbl; - pscreen->user_buffer_create = nv50_user_buffer_create; + pscreen->resource_create = nv50_resource_create; + pscreen->resource_from_handle = nv50_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; + pscreen->user_buffer_create = nouveau_user_buffer_create; } diff --git a/src/gallium/drivers/nv50/nv50_resource.h b/src/gallium/drivers/nv50/nv50_resource.h index 4b2a75e11a..0e9f0a2557 100644 --- a/src/gallium/drivers/nv50/nv50_resource.h +++ b/src/gallium/drivers/nv50/nv50_resource.h @@ -1,97 +1,70 @@ -#ifndef NV50_RESOURCE_H -#define NV50_RESOURCE_H +#ifndef __NV50_RESOURCE_H__ +#define __NV50_RESOURCE_H__ #include "util/u_transfer.h" - +#include "util/u_double_list.h" +#define NOUVEAU_NVC0 #include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_buffer.h" +#undef NOUVEAU_NVC0 + +void +nv50_init_resource_functions(struct pipe_context *pcontext); -struct pipe_resource; -struct nouveau_bo; +void +nv50_screen_init_resource_functions(struct pipe_screen *pscreen); +#define NV50_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) -/* This gets further specialized into either buffer or texture - * structures. In the future we'll want to remove much of that - * distinction, but for now try to keep as close to the existing code - * as possible and use the vtbl struct to choose between the two - * underlying implementations. - */ -struct nv50_resource { - struct pipe_resource base; - const struct u_resource_vtbl *vtbl; - struct nouveau_bo *bo; -}; +#define NV50_TILE_PITCH(m) (64 << 0) +#define NV50_TILE_HEIGHT(m) ( 4 << NV50_TILE_DIM_SHIFT(m, 0)) +#define NV50_TILE_DEPTH(m) ( 1 << NV50_TILE_DIM_SHIFT(m, 1)) + +#define NV50_TILE_SIZE_2D(m) ((64 * 4) << \ + NV50_TILE_DIM_SHIFT(m, 0)) + +#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_DIM_SHIFT(m, 1)) struct nv50_miptree_level { - int *image_offset; - unsigned pitch; - unsigned tile_mode; + uint32_t offset; + uint32_t pitch; + uint32_t tile_mode; }; #define NV50_MAX_TEXTURE_LEVELS 16 struct nv50_miptree { - struct nv50_resource base; - - struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS]; - int image_nr; - int total_size; + struct nv04_resource base; + struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS]; + uint32_t total_size; + uint32_t layer_stride; + boolean layout_3d; /* TRUE if layer count varies with mip level */ }; static INLINE struct nv50_miptree * nv50_miptree(struct pipe_resource *pt) { - return (struct nv50_miptree *)pt; -} - - -static INLINE -struct nv50_resource *nv50_resource(struct pipe_resource *resource) -{ - return (struct nv50_resource *)resource; + return (struct nv50_miptree *)pt; } -/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ -static INLINE boolean -nv50_resource_mapped_by_gpu(struct pipe_resource *resource) -{ - return nv50_resource(resource)->bo->handle; -} - -void -nv50_init_resource_functions(struct pipe_context *pcontext); - -void -nv50_screen_init_resource_functions(struct pipe_screen *pscreen); - -/* Internal functions +/* Internal functions: */ struct pipe_resource * nv50_miptree_create(struct pipe_screen *pscreen, - const struct pipe_resource *tmp); + const struct pipe_resource *tmp); struct pipe_resource * nv50_miptree_from_handle(struct pipe_screen *pscreen, - const struct pipe_resource *template, - struct winsys_handle *whandle); - -struct pipe_resource * -nv50_buffer_create(struct pipe_screen *pscreen, - const struct pipe_resource *template); - -struct pipe_resource * -nv50_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned usage); - + const struct pipe_resource *template, + struct winsys_handle *whandle); struct pipe_surface * -nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, - const struct pipe_surface *surf_tmpl); +nv50_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); void -nv50_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps); - +nv50_miptree_surface_del(struct pipe_context *, struct pipe_surface *); #endif diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index edc3d54d01..7690c80eef 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Ben Skeggs + * Copyright 2010 Christoph Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,596 +25,625 @@ #include "nv50_context.h" #include "nv50_screen.h" -#include "nv50_resource.h" -#include "nv50_program.h" -#include "nouveau/nouveau_stateobj.h" +#include "nouveau/nv_object.xml.h" + +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + +extern int nouveau_device_get_param(struct nouveau_device *dev, + uint64_t param, uint64_t *value); static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage, unsigned geom_flags) + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) { - if (sample_count > 1) - return FALSE; - - if (!util_format_s3tc_enabled) { - switch (format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - break; - } - } - - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - if ((nouveau_screen(pscreen)->device->chipset & 0xf0) != 0xa0) - return FALSE; - break; - default: - break; - } - - /* transfers & shared are always supported */ - usage &= ~(PIPE_BIND_TRANSFER_READ | - PIPE_BIND_TRANSFER_WRITE | - PIPE_BIND_SHARED); - - return (nv50_format_table[format].usage & usage) == usage; + if (sample_count > 1) + return FALSE; + + if (!util_format_s3tc_enabled) { + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return FALSE; + default: + break; + } + } + + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + if (nv50_screen(pscreen)->tesla->grclass < NVA0_3D) + return FALSE; + break; + default: + break; + } + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nv50_format_table[format].usage & bindings) == bindings; } static int nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 32; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 32; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 64; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - case PIPE_CAP_SM3: - return 1; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_STREAM_OUTPUT: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_TEXTURE_SWIZZLE: - return 1; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - case PIPE_CAP_INDEP_BLEND_ENABLE: - return 1; - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_DEPTH_CLAMP: - return 1; - case PIPE_CAP_SHADER_STENCIL_EXPORT: - return 0; - case PIPE_CAP_PRIMITIVE_RESTART: - return 0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 32; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 64; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_ARRAY_TEXTURES: /* shader support missing */ + return 0; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_GLSL: + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_STREAM_OUTPUT: + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return nv50_screen(pscreen)->tesla->grclass >= NVA3_3D; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + return 1; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } } static int nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, - enum pipe_shader_cap param) + enum pipe_shader_cap param) { - switch(shader) { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - break; - case PIPE_SHADER_GEOMETRY: - default: - return 0; - } - - switch(param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: /* arbitrary limit */ - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: /* need stack bo */ - return 4; - case PIPE_SHADER_CAP_MAX_INPUTS: /* 128 / 4 with GP */ - if (shader == PIPE_SHADER_GEOMETRY) - return 128 / 4; - else - return 64 / 4; - case PIPE_SHADER_CAP_MAX_CONSTS: - return 65536 / 16; - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: /* 16 - 1, but not implemented */ - return 1; - case PIPE_SHADER_CAP_MAX_ADDRS: /* no spilling atm */ - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: /* not yet handled */ - return 0; - case PIPE_SHADER_CAP_MAX_TEMPS: /* no spilling atm */ - return NV50_CAP_MAX_PROGRAM_TEMPS; - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - case PIPE_SHADER_CAP_SUBROUTINES: - return 0; - default: - return 0; - } + switch (shader) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 4; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + return 0x300 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 14; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return NV50_CAP_MAX_PROGRAM_TEMPS; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; /* please inline, or provide function declarations */ + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } } static float nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } } static void nv50_screen_destroy(struct pipe_screen *pscreen) { - struct nv50_screen *screen = nv50_screen(pscreen); - unsigned i; - - for (i = 0; i < 3; i++) { - if (screen->constbuf_parm[i]) - nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); - } - - if (screen->constbuf_misc[0]) - nouveau_bo_ref(NULL, &screen->constbuf_misc[0]); - if (screen->tic) - nouveau_bo_ref(NULL, &screen->tic); - if (screen->tsc) - nouveau_bo_ref(NULL, &screen->tsc); - - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->tesla); - nouveau_grobj_free(&screen->eng2d); - nouveau_grobj_free(&screen->m2mf); - nouveau_resource_destroy(&screen->immd_heap); - nouveau_screen_fini(&screen->base); - FREE(screen); -} + struct nv50_screen *screen = nv50_screen(pscreen); -#define BGN_RELOC(ch, bo, gr, m, n, fl) \ - OUT_RELOC(ch, bo, (n << 18) | (gr->subc << 13) | m, fl, 0, 0) + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref (NULL, &screen->base.fence.current); + } -void -nv50_screen_reloc_constbuf(struct nv50_screen *screen, unsigned cbi) + nouveau_bo_ref(NULL, &screen->code); + nouveau_bo_ref(NULL, &screen->tls_bo); + nouveau_bo_ref(NULL, &screen->stack_bo); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->uniforms); + nouveau_bo_ref(NULL, &screen->fence.bo); + + nouveau_resource_destroy(&screen->vp_code_heap); + nouveau_resource_destroy(&screen->gp_code_heap); + nouveau_resource_destroy(&screen->fp_code_heap); + + if (screen->tic.entries) + FREE(screen->tic.entries); + + nouveau_mm_destroy(screen->mm_VRAM_fe0); + + nouveau_grobj_free(&screen->tesla); + nouveau_grobj_free(&screen->eng2d); + nouveau_grobj_free(&screen->m2mf); + + nouveau_notifier_free(&screen->sync); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static void +nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 sequence) { - struct nouveau_bo *bo; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *tesla = screen->tesla; - unsigned size; - const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - - switch (cbi) { - case NV50_CB_PMISC: - bo = screen->constbuf_misc[0]; - size = 0x200; - break; - case NV50_CB_PVP: - case NV50_CB_PFP: - case NV50_CB_PGP: - bo = screen->constbuf_parm[cbi - NV50_CB_PVP]; - size = 0; - break; - default: - return; - } - - BGN_RELOC (chan, bo, tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, bo, 0, rl); - OUT_RELOCl(chan, bo, 0, rl); - OUT_RELOC (chan, bo, (cbi << 16) | size, rl, 0, 0); + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_channel *chan = screen->base.channel; + + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); + OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); + OUT_RING (chan, sequence); + OUT_RING (chan, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 | + NV50_3D_QUERY_GET_UNK4 | + NV50_3D_QUERY_GET_UNIT_CROP | + NV50_3D_QUERY_GET_TYPE_QUERY | + NV50_3D_QUERY_GET_QUERY_SELECT_ZERO | + NV50_3D_QUERY_GET_SHORT); } -void -nv50_screen_relocs(struct nv50_screen *screen) +static u32 +nv50_screen_fence_update(struct pipe_screen *pscreen) { - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *tesla = screen->tesla; - unsigned i; - const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + struct nv50_screen *screen = nv50_screen(pscreen); + return screen->fence.map[0]; +} - MARK_RING (chan, 28, 26); +#define FAIL_SCREEN_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nv50_screen_destroy(pscreen); \ + return NULL; \ + } while(0) - /* cause grobj autobind */ - BEGIN_RING(chan, tesla, 0x0100, 1); - OUT_RING (chan, 0); +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +{ + struct nv50_screen *screen; + struct nouveau_channel *chan; + struct pipe_screen *pscreen; + uint64_t value; + uint32_t tesla_class; + unsigned stack_size, max_warps, tls_space; + int ret; + unsigned i, base; + + screen = CALLOC_STRUCT(nv50_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + screen->base.sysmem_bindings = PIPE_BIND_CONSTANT_BUFFER; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) + FAIL_SCREEN_INIT("nouveau_screen_init failed: %d\n", ret); + + chan = screen->base.channel; + + pscreen->winsys = ws; + pscreen->destroy = nv50_screen_destroy; + pscreen->context_create = nv50_create; + pscreen->is_format_supported = nv50_screen_is_format_supported; + pscreen->get_param = nv50_screen_get_param; + pscreen->get_shader_param = nv50_screen_get_shader_param; + pscreen->get_paramf = nv50_screen_get_paramf; + + nv50_screen_init_resource_functions(pscreen); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, + &screen->fence.bo); + if (ret) + goto fail; + nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR); + screen->fence.map = screen->fence.bo->map; + nouveau_bo_unmap(screen->fence.bo); + screen->base.fence.emit = nv50_screen_fence_emit; + screen->base.fence.update = nv50_screen_fence_update; + + ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); + if (ret) + FAIL_SCREEN_INIT("Error allocating notifier: %d\n", ret); + + ret = nouveau_grobj_alloc(chan, 0xbeef5039, NV50_M2MF, &screen->m2mf); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); + + BIND_RING (chan, screen->m2mf, NV50_SUBCH_MF); + BEGIN_RING(chan, RING_MF_(NV04_M2MF_DMA_NOTIFY), 3); + OUT_RING (chan, screen->sync->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + + ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); + + BIND_RING (chan, screen->eng2d, NV50_SUBCH_2D); + BEGIN_RING(chan, RING_2D(DMA_NOTIFY), 4); + OUT_RING (chan, screen->sync->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, RING_2D(OPERATION), 1); + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); + BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D_(0x0888), 1); + OUT_RING (chan, 1); + + switch (dev->chipset & 0xf0) { + case 0x50: + tesla_class = NV50_3D; + break; + case 0x80: + case 0x90: + tesla_class = NV84_3D; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa0: + case 0xaa: + case 0xac: + tesla_class = NVA0_3D; + break; + case 0xaf: + tesla_class = NVAF_3D; + break; + default: + tesla_class = NVA3_3D; + break; + } + break; + default: + FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset); + break; + } + + ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + + BIND_RING (chan, screen->tesla, NV50_SUBCH_3D); + + BEGIN_RING(chan, RING_3D(COND_MODE), 1); + OUT_RING (chan, NV50_3D_COND_MODE_ALWAYS); + + BEGIN_RING(chan, RING_3D(DMA_NOTIFY), 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, RING_3D(DMA_ZETA), 11); + for (i = 0; i < 11; ++i) + OUT_RING(chan, chan->vram->handle); + BEGIN_RING(chan, RING_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN); + for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i) + OUT_RING(chan, chan->vram->handle); + + BEGIN_RING(chan, RING_3D(REG_MODE), 1); + OUT_RING (chan, NV50_3D_REG_MODE_STRIPED); + BEGIN_RING(chan, RING_3D(UNK1400_LANES), 1); + OUT_RING (chan, 0xf); + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); + OUT_RING (chan, NV50_3D_MULTISAMPLE_MODE_MS1); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ZCULL_REGION), 1); /* deactivate ZCULL */ + OUT_RING (chan, 0x3f); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, + 3 << NV50_CODE_BO_SIZE_LOG2, &screen->code); + if (ret) + goto fail; + + nouveau_resource_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + nouveau_resource_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + nouveau_resource_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + + base = 1 << NV50_CODE_BO_SIZE_LOG2; + + BEGIN_RING(chan, RING_3D(VP_ADDRESS_HIGH), 2); + OUT_RELOCh(chan, screen->code, base * 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->code, base * 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_3D(FP_ADDRESS_HIGH), 2); + OUT_RELOCh(chan, screen->code, base * 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->code, base * 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_3D(GP_ADDRESS_HIGH), 2); + OUT_RELOCh(chan, screen->code, base * 2, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->code, base * 2, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + + max_warps = util_bitcount(value & 0xffff); + max_warps *= util_bitcount((value >> 24) & 0xf) * 32; + + stack_size = max_warps * 64 * 8; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, + &screen->stack_bo); + if (ret) + FAIL_SCREEN_INIT("Failed to allocate stack bo: %d\n", ret); + + BEGIN_RING(chan, RING_3D(STACK_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 4); + + tls_space = NV50_CAP_MAX_PROGRAM_TEMPS * 16; + + screen->tls_size = tls_space * max_warps * 32; + + debug_printf("max_warps = %i, tls_size = %lu KiB\n", + max_warps, screen->tls_size >> 10); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, screen->tls_size, + &screen->tls_bo); + if (ret) + FAIL_SCREEN_INIT("Failed to allocate stack bo: %d\n", ret); + + BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->tls_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->tls_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, util_unsigned_logbase2(tls_space / 8)); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, + &screen->uniforms); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->uniforms, 0 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 0 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, (NV50_CB_PVP << 16) | 0x0000); + + BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->uniforms, 1 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 1 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, (NV50_CB_PGP << 16) | 0x0000); + + BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->uniforms, 2 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 2 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, (NV50_CB_PFP << 16) | 0x0000); + + BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->uniforms, 3 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 3 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200); + + BEGIN_RING_NI(chan, RING_3D(SET_PROGRAM_CB), 6); + OUT_RING (chan, (NV50_CB_PVP << 12) | 0x001); + OUT_RING (chan, (NV50_CB_PGP << 12) | 0x021); + OUT_RING (chan, (NV50_CB_PFP << 12) | 0x031); + OUT_RING (chan, (NV50_CB_AUX << 12) | 0xf01); + OUT_RING (chan, (NV50_CB_AUX << 12) | 0xf21); + OUT_RING (chan, (NV50_CB_AUX << 12) | 0xf31); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, + &screen->txc); + if (ret) + FAIL_SCREEN_INIT("Could not allocate TIC/TSC bo: %d\n", ret); + + /* max TIC (bits 4:8) & TSC bindings, per program type */ + for (i = 0; i < 3; ++i) { + BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1); + OUT_RING (chan, 0x54); + } + + BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_RING(chan, RING_3D(LINKED_TSC), 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1); + OUT_RING (chan, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 1.0f); + + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); +#ifdef NV50_SCISSORS_CLIPPING + OUT_RING (chan, 0x0000); +#else + OUT_RING (chan, 0x1080); +#endif + + BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1); + OUT_RING (chan, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT); - BGN_RELOC (chan, screen->tic, tesla, NV50TCL_TIC_ADDRESS_HIGH, 2, rl); - OUT_RELOCh(chan, screen->tic, 0, rl); - OUT_RELOCl(chan, screen->tic, 0, rl); + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3); + OUT_RING (chan, 1); + OUT_RING (chan, 8192 << 16); + OUT_RING (chan, 8192 << 16); - BGN_RELOC (chan, screen->tsc, tesla, NV50TCL_TSC_ADDRESS_HIGH, 2, rl); - OUT_RELOCh(chan, screen->tsc, 0, rl); - OUT_RELOCl(chan, screen->tsc, 0, rl); + BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1); + OUT_RING (chan, NV50_3D_POINT_RASTER_RULES_OGL); + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0x11111111); + BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); + OUT_RING (chan, 1); - nv50_screen_reloc_constbuf(screen, NV50_CB_PMISC); + FIRE_RING (chan); - BGN_RELOC (chan, screen->constbuf_misc[0], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_misc[0], 0x200, rl); - OUT_RELOCl(chan, screen->constbuf_misc[0], 0x200, rl); - OUT_RELOC (chan, screen->constbuf_misc[0], - (NV50_CB_AUX << 16) | 0x0200, rl, 0, 0); + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; - for (i = 0; i < 3; ++i) - nv50_screen_reloc_constbuf(screen, NV50_CB_PVP + i); + screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); - BGN_RELOC (chan, screen->stack_bo, - tesla, NV50TCL_STACK_ADDRESS_HIGH, 2, rl); - OUT_RELOCh(chan, screen->stack_bo, 0, rl); - OUT_RELOCl(chan, screen->stack_bo, 0, rl); + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); - if (!screen->cur_ctx->req_lmem) - return; + return pscreen; - BGN_RELOC (chan, screen->local_bo, - tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 2, rl); - OUT_RELOCh(chan, screen->local_bo, 0, rl); - OUT_RELOCl(chan, screen->local_bo, 0, rl); +fail: + nv50_screen_destroy(pscreen); + return NULL; } -#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS -# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 -#endif +void +nv50_screen_make_buffers_resident(struct nv50_screen *screen) +{ + struct nouveau_channel *chan = screen->base.channel; -extern int nouveau_device_get_param(struct nouveau_device *dev, - uint64_t param, uint64_t *value); + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; -struct pipe_screen * -nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) + MARK_RING(chan, 5, 5); + nouveau_bo_validate(chan, screen->code, flags); + nouveau_bo_validate(chan, screen->uniforms, flags); + nouveau_bo_validate(chan, screen->txc, flags); + nouveau_bo_validate(chan, screen->tls_bo, flags); + nouveau_bo_validate(chan, screen->stack_bo, flags); +} + +int +nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry) { - struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - uint64_t value; - unsigned chipset = dev->chipset; - unsigned tesla_class = 0; - unsigned stack_size, local_size, max_warps; - int ret, i; - const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv50_screen_destroy; - pscreen->get_param = nv50_screen_get_param; - pscreen->get_shader_param = nv50_screen_get_shader_param; - pscreen->get_paramf = nv50_screen_get_paramf; - pscreen->is_format_supported = nv50_screen_is_format_supported; - pscreen->context_create = nv50_create; - - nv50_screen_init_resource_functions(pscreen); - - /* DMA engine object */ - ret = nouveau_grobj_alloc(chan, 0xbeef5039, - NV50_MEMORY_TO_MEMORY_FORMAT, &screen->m2mf); - if (ret) { - NOUVEAU_ERR("Error creating M2MF object: %d\n", ret); - nv50_screen_destroy(pscreen); - return NULL; - } - - /* 2D object */ - ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); - if (ret) { - NOUVEAU_ERR("Error creating 2D object: %d\n", ret); - nv50_screen_destroy(pscreen); - return NULL; - } - - /* 3D object */ - switch (chipset & 0xf0) { - case 0x50: - tesla_class = NV50TCL; - break; - case 0x80: - case 0x90: - tesla_class = NV84TCL; - break; - case 0xa0: - switch (chipset) { - case 0xa0: - case 0xaa: - case 0xac: - tesla_class = NVA0TCL; - break; - default: - tesla_class = NVA8TCL; - break; - } - break; - default: - NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset); - nv50_screen_destroy(pscreen); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, - &screen->tesla); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - nv50_screen_destroy(pscreen); - return NULL; - } - - /* this is necessary for the new RING_3D / statebuffer code */ - BIND_RING(chan, screen->tesla, 7); - - /* Sync notifier */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv50_screen_destroy(pscreen); - return NULL; - } - - /* Static M2MF init */ - BEGIN_RING(chan, screen->m2mf, - NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); - OUT_RING (chan, screen->sync->handle); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - - /* Static 2D init */ - BEGIN_RING(chan, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); - OUT_RING (chan, screen->sync->handle); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - BEGIN_RING(chan, screen->eng2d, NV50_2D_OPERATION, 1); - OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); - BEGIN_RING(chan, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, screen->eng2d, 0x0888, 1); - OUT_RING (chan, 1); - - /* Static tesla init */ - BEGIN_RING(chan, screen->tesla, NV50TCL_COND_MODE, 1); - OUT_RING (chan, NV50TCL_COND_MODE_ALWAYS); - BEGIN_RING(chan, screen->tesla, NV50TCL_DMA_NOTIFY, 1); - OUT_RING (chan, screen->sync->handle); - BEGIN_RING(chan, screen->tesla, NV50TCL_DMA_ZETA, 11); - for (i = 0; i < 11; i++) - OUT_RING (chan, chan->vram->handle); - BEGIN_RING(chan, screen->tesla, - NV50TCL_DMA_COLOR(0), NV50TCL_DMA_COLOR__SIZE); - for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) - OUT_RING (chan, chan->vram->handle); - - BEGIN_RING(chan, screen->tesla, NV50TCL_RT_CONTROL, 1); - OUT_RING (chan, 1); - - /* activate all 32 lanes (threads) in a warp */ - BEGIN_RING(chan, screen->tesla, NV50TCL_REG_MODE, 1); - OUT_RING (chan, NV50TCL_REG_MODE_STRIPED); - BEGIN_RING(chan, screen->tesla, 0x1400, 1); - OUT_RING (chan, 0xf); - - /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - for (i = 0; i < 3; ++i) { - BEGIN_RING(chan, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); - OUT_RING (chan, 0x54); - } - - /* origin is top left (set to 1 for bottom left) */ - BEGIN_RING(chan, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); - OUT_RING (chan, 8); - - BEGIN_RING(chan, screen->tesla, NV50TCL_CLEAR_FLAGS, 1); - OUT_RING (chan, NV50TCL_CLEAR_FLAGS_D3D); - - /* constant buffers for immediates and VP/FP parameters */ - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4, - &screen->constbuf_misc[0]); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl); - OUT_RING (chan, (NV50_CB_PMISC << 16) | 0x0200); - BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->constbuf_misc[0], 0x200, rl); - OUT_RELOCl(chan, screen->constbuf_misc[0], 0x200, rl); - OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200); - - for (i = 0; i < 3; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (4096 * 4) * 4, - &screen->constbuf_parm[i]); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - /* CB_DEF_SET_SIZE value of 0x0000 means 65536 */ - OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0000); - } - - if (nouveau_resource_init(&screen->immd_heap, 0, 128)) { - NOUVEAU_ERR("Error initialising shader immediates heap.\n"); - nv50_screen_destroy(pscreen); - return NULL; - } - - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 3 * 32 * (8 * 4), - &screen->tic); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - BEGIN_RING(chan, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(chan, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RING (chan, 3 * 32 - 1); - - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 3 * 32 * (8 * 4), - &screen->tsc); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - BEGIN_RING(chan, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(chan, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RING (chan, 0); /* ignored if TSC_LINKED (0x1234) == 1 */ - - /* map constant buffers: - * B = buffer ID (maybe more than 1 byte) - * N = CB index used in shader instruction - * P = program type (0 = VP, 2 = GP, 3 = FP) - * SET_PROGRAM_CB = 0x000BBNP1 - */ - BEGIN_RING_NI(chan, screen->tesla, NV50TCL_SET_PROGRAM_CB, 8); - /* bind immediate buffer */ - OUT_RING (chan, 0x001 | (NV50_CB_PMISC << 12)); - OUT_RING (chan, 0x021 | (NV50_CB_PMISC << 12)); - OUT_RING (chan, 0x031 | (NV50_CB_PMISC << 12)); - /* bind auxiliary constbuf to immediate data bo */ - OUT_RING (chan, 0x201 | (NV50_CB_AUX << 12)); - OUT_RING (chan, 0x221 | (NV50_CB_AUX << 12)); - /* bind parameter buffers */ - OUT_RING (chan, 0x101 | (NV50_CB_PVP << 12)); - OUT_RING (chan, 0x121 | (NV50_CB_PGP << 12)); - OUT_RING (chan, 0x131 | (NV50_CB_PFP << 12)); - - /* shader stack */ - nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); - - max_warps = util_bitcount(value & 0xffff); - max_warps *= util_bitcount((value >> 24) & 0xf) * 32; - - stack_size = max_warps * 64 * 8; - - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, - stack_size, &screen->stack_bo); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RING (chan, 4); - - local_size = (NV50_CAP_MAX_PROGRAM_TEMPS * 16) * max_warps * 32; - - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, - local_size, &screen->local_bo); - if (ret) { - nv50_screen_destroy(pscreen); - return NULL; - } - - local_size = NV50_CAP_MAX_PROGRAM_TEMPS * 16; - - BEGIN_RING(chan, screen->tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 3); - OUT_RELOCh(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RING (chan, util_unsigned_logbase2(local_size / 8)); - - /* Vertex array limits - max them out */ - for (i = 0; i < 16; i++) { - BEGIN_RING(chan, screen->tesla, - NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); - OUT_RING (chan, 0x000000ff); - OUT_RING (chan, 0xffffffff); - } - - BEGIN_RING(chan, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); - OUT_RINGf (chan, 0.0f); - OUT_RINGf (chan, 1.0f); - - BEGIN_RING(chan, screen->tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); - OUT_RING (chan, 1); - - /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ - BEGIN_RING(chan, screen->tesla, NV50TCL_LINKED_TSC, 1); - OUT_RING (chan, 1); - - BEGIN_RING(chan, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (chan, 1); /* default edgeflag to TRUE */ - - FIRE_RING (chan); - - screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE); - if(!screen->force_push) - screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = NOUVEAU_BO_GART; - return pscreen; + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nv50_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; } +int +nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nv50_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 6e15230b48..aea434b867 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -1,53 +1,148 @@ #ifndef __NV50_SCREEN_H__ #define __NV50_SCREEN_H__ +#define NOUVEAU_NVC0 #include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_fence.h" +#include "nouveau/nouveau_mm.h" +#undef NOUVEAU_NVC0 +#include "nv50_winsys.h" +#include "nv50_stateobj.h" + +#define NV50_TIC_MAX_ENTRIES 2048 +#define NV50_TSC_MAX_ENTRIES 2048 struct nv50_context; -struct nv50_screen { - struct nouveau_screen base; +#define NV50_CODE_BO_SIZE_LOG2 19 - struct nouveau_winsys *nvws; +#define NV50_SCRATCH_SIZE (2 << 20) +#define NV50_SCRATCH_NR_BUFFERS 2 - struct nv50_context *cur_ctx; +struct nv50_screen { + struct nouveau_screen base; + struct nouveau_winsys *nvws; + + struct nv50_context *cur_ctx; + + struct nouveau_bo *code; + struct nouveau_bo *uniforms; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *stack_bo; + struct nouveau_bo *tls_bo; + + uint64_t tls_size; + + struct nouveau_resource *vp_code_heap; + struct nouveau_resource *gp_code_heap; + struct nouveau_resource *fp_code_heap; + + struct { + void **entries; + int next; + uint32_t lock[NV50_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NV50_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + uint32_t *map; + struct nouveau_bo *bo; + } fence; + + struct nouveau_notifier *sync; + + struct nouveau_mman *mm_VRAM_fe0; + + struct nouveau_grobj *tesla; + struct nouveau_grobj *eng2d; + struct nouveau_grobj *m2mf; +}; - struct nouveau_grobj *tesla; - struct nouveau_grobj *eng2d; - struct nouveau_grobj *m2mf; - struct nouveau_notifier *sync; +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} - struct nouveau_bo *constbuf_misc[1]; - struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; +void nv50_screen_make_buffers_resident(struct nv50_screen *); - struct nouveau_resource *immd_heap; +int nv50_screen_tic_alloc(struct nv50_screen *, void *); +int nv50_screen_tsc_alloc(struct nv50_screen *, void *); - struct nouveau_bo *tic; - struct nouveau_bo *tsc; +static INLINE void +nv50_resource_fence(struct nv04_resource *res, uint32_t flags) +{ + struct nv50_screen *screen = nv50_screen(res->base.screen); - struct nouveau_bo *stack_bo; /* control flow stack */ - struct nouveau_bo *local_bo; /* l[] memory */ + if (res->mm) { + nouveau_fence_ref(screen->base.fence.current, &res->fence); - boolean force_push; -}; + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(screen->base.fence.current, &res->fence_wr); + } +} -static INLINE struct nv50_screen * -nv50_screen(struct pipe_screen *screen) +static INLINE void +nv50_resource_validate(struct nv04_resource *res, uint32_t flags) { - return (struct nv50_screen *)screen; -} + struct nv50_screen *screen = nv50_screen(res->base.screen); -extern void nv50_screen_relocs(struct nv50_screen *); + if (likely(res->bo)) { + nouveau_bo_validate(screen->base.channel, res->bo, flags); -extern void nv50_screen_reloc_constbuf(struct nv50_screen *, unsigned cbi); + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + if (flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nv50_resource_fence(res, flags); + } +} struct nv50_format { - uint32_t rt; - uint32_t tic; - uint32_t vtx; - uint32_t usage; + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; }; extern const struct nv50_format nv50_format_table[]; +static INLINE void +nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + #endif diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 306aa81d98..bea9c095bb 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -28,422 +28,266 @@ #include "nv50_context.h" -static void -nv50_transfer_constbuf(struct nv50_context *nv50, - struct pipe_resource *buf, unsigned size, unsigned cbi) +void +nv50_constbufs_validate(struct nv50_context *nv50) { - struct pipe_context *pipe = &nv50->pipe; - struct pipe_transfer *transfer; struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - uint32_t *map; - unsigned count, start; + unsigned s; - if (buf == NULL) - return; + for (s = 0; s < 3; ++s) { + struct nv04_resource *res; + int i; + unsigned p, b; - map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer); - if (!map) - return; + if (s == PIPE_SHADER_FRAGMENT) + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; + else + if (s == PIPE_SHADER_GEOMETRY) + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; + else + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; + + while (nv50->constbuf_dirty[s]) { + struct nouveau_bo *bo; + unsigned start = 0; + unsigned words = 0; + + i = ffs(nv50->constbuf_dirty[s]) - 1; + nv50->constbuf_dirty[s] &= ~(1 << i); + + res = nv04_resource(nv50->constbuf[s][i]); + if (!res) { + if (i != 0) { + BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1); + OUT_RING (chan, (i << 8) | p | 0); + } + continue; + } - count = (buf->width0 + 3) / 4; - start = 0; + if (i == 0) { + b = NV50_CB_PVP + s; - while (count) { - unsigned nr = AVAIL_RING(chan); + /* always upload GL uniforms through CB DATA */ + bo = nv50->screen->uniforms; + words = res->base.width0 / 4; + } else { + b = s * 16 + i; - if (nr < 8) { - FIRE_RING(chan); - continue; - } - nr = MIN2(count, nr - 7); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + assert(0); - nv50_screen_reloc_constbuf(nv50->screen, cbi); + if (!nouveau_resource_mapped_by_gpu(&res->base)) { + nouveau_buffer_migrate(&nv50->base, res, NOUVEAU_BO_VRAM); - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); - OUT_RING (chan, (start << 8) | cbi); - BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr); - OUT_RINGp (chan, map, nr); + BEGIN_RING(chan, RING_3D(CODE_CB_FLUSH), 1); + OUT_RING (chan, 0); + } + MARK_RING (chan, 6, 2); + BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3); + OUT_RESRCh(chan, res, 0, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, 0, NOUVEAU_BO_RD); + OUT_RING (chan, (b << 16) | (res->base.width0 & 0xffff)); + BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1); + OUT_RING (chan, (b << 12) | (i << 8) | p | 1); - count -= nr; - start += nr; - map += nr; - } - - pipe_buffer_unmap(pipe, buf, transfer); -} + bo = res->bo; -static void -nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) -{ - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned cbi; - - if (p->immd_size) { - uint32_t *data = p->immd; - unsigned count = p->immd_size / 4; - unsigned start = 0; + nv50_bufctx_add_resident(nv50, NV50_BUFCTX_CONSTANT, res, + res->domain | NOUVEAU_BO_RD); + } - while (count) { - unsigned nr = AVAIL_RING(chan); + if (words) { + MARK_RING(chan, 8, 1); - if (nr < 8) { - FIRE_RING(chan); - continue; + nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR); } - nr = MIN2(count, nr - 7); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); - nv50_screen_reloc_constbuf(nv50->screen, NV50_CB_PMISC); + while (words) { + unsigned nr = AVAIL_RING(chan); - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); - OUT_RING (chan, (start << 8) | NV50_CB_PMISC); - BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr); - OUT_RINGp (chan, data, nr); + if (nr < 16) { + FIRE_RING(chan); + nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR); + continue; + } + nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); - count -= nr; - start += nr; - data += nr; - } - } + BEGIN_RING(chan, RING_3D(CB_ADDR), 1); + OUT_RING (chan, (start << 8) | b); + BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nr); + OUT_RINGp (chan, &res->data[start * 4], nr); - /* If the state tracker doesn't change the constbuf, and it is first - * validated with a program that doesn't use it, this check prevents - * it from even being uploaded. */ - /* - if (p->parm_size == 0) - return; - */ - - switch (p->type) { - case PIPE_SHADER_VERTEX: - cbi = NV50_CB_PVP; - break; - case PIPE_SHADER_FRAGMENT: - cbi = NV50_CB_PFP; - break; - case PIPE_SHADER_GEOMETRY: - cbi = NV50_CB_PGP; - break; - default: - assert(0); - return; + start += nr; + words -= nr; + } + } } - - nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi); } -static void -nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +static boolean +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) { - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_grobj *eng2d = nv50->screen->eng2d; + struct nouveau_resource *heap; int ret; - unsigned offset; - unsigned size = p->code_size; - uint32_t *data = p->code; + unsigned size; - assert(p->translated); + if (prog->translated) + return TRUE; - /* TODO: use a single bo (for each type) for shader code */ - if (p->bo) - return; - ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo); - assert(!ret); - - offset = p->code_start = 0; - - BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); - OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1); - OUT_RING (chan, 0x40000); - BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2); - OUT_RING (chan, 0x10000); - OUT_RING (chan, 1); - - while (size) { - unsigned nr = size / 4; - - if (AVAIL_RING(chan) < 32) - FIRE_RING(chan); - - nr = MIN2(nr, AVAIL_RING(chan) - 18); - nr = MIN2(nr, 1792); - if (nr < (size / 4)) - nr &= ~0x3f; - assert(!(size & 3)); - - BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2); - OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM); - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); - OUT_RING (chan, nr * 4); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - - BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr); - OUT_RINGp (chan, data, nr); - - data += nr; - offset += nr * 4; - size -= nr * 4; - } + prog->translated = nv50_program_translate(prog); + if (!prog->translated) + return FALSE; - BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); - OUT_RING (chan, 0); -} + if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; + else + if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; + else + heap = nv50->screen->vp_code_heap; -static void -nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p) -{ - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(5, 7, 2); - - nv50_program_validate_code(nv50, p); - - so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); - so_data (so, p->vp.attrs[0]); - so_data (so, p->vp.attrs[1]); - so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); - so_data (so, p->max_out); - so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1); - so_data (so, p->max_gpr); - so_method(so, tesla, NV50TCL_VP_START_ID, 1); - so_data (so, p->code_start); - - so_ref(so, &p->so); - so_ref(NULL, &so); -} + size = align(prog->code_size, 0x100); -static void -nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p) -{ - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(6, 7, 2); - - nv50_program_validate_code(nv50, p); - - so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1); - so_data (so, p->max_gpr); - so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); - so_data (so, p->max_out); - so_method(so, tesla, NV50TCL_FP_CONTROL, 1); - so_data (so, p->fp.flags[0]); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); - so_data (so, p->fp.flags[1]); - so_method(so, tesla, NV50TCL_FP_START_ID, 1); - so_data (so, p->code_start); - - so_ref(so, &p->so); - so_ref(NULL, &so); -} + ret = nouveau_resource_alloc(heap, size, prog, &prog->res); + if (ret) { + NOUVEAU_ERR("out of code space for shader type %i\n", prog->type); + return FALSE; + } + prog->code_base = prog->res->start; -static void -nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p) -{ - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(6, 7, 2); - - nv50_program_validate_code(nv50, p); - - so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1); - so_data (so, p->max_gpr); - so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1); - so_data (so, p->max_out); - so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1); - so_data (so, p->gp.prim_type); - so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1); - so_data (so, p->gp.vert_count); - so_method(so, tesla, NV50TCL_GP_START_ID, 1); - so_data (so, p->code_start); - - so_ref(so, &p->so); - so_ref(NULL, &so); -} + nv50_relocate_program(prog, prog->code_base, 0); -static boolean -nv50_program_validate(struct nv50_program *p) -{ - p->translated = nv50_program_tx(p); - assert(p->translated); - return p->translated; -} + nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, + (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); -static INLINE void -nv50_program_validate_common(struct nv50_context *nv50, struct nv50_program *p) -{ - nv50_program_validate_code(nv50, p); + BEGIN_RING(nv50->screen->base.channel, RING_3D(CODE_CB_FLUSH), 1); + OUT_RING (nv50->screen->base.channel, 0); - if (p->uses_lmem) - nv50->req_lmem |= 1 << p->type; - else - nv50->req_lmem &= ~(1 << p->type); + return TRUE; } -struct nouveau_stateobj * +void nv50_vertprog_validate(struct nv50_context *nv50) { - struct nv50_program *p = nv50->vertprog; - struct nouveau_stateobj *so = NULL; - - if (!p->translated) { - if (nv50_program_validate(p)) - nv50_vp_update_stateobj(nv50, p); - else - return NULL; - } - - if (nv50->dirty & NV50_NEW_VERTPROG_CB) - nv50_program_validate_data(nv50, p); - - if (!(nv50->dirty & NV50_NEW_VERTPROG)) - return NULL; - - nv50_program_validate_common(nv50, p); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_program *vp = nv50->vertprog; - so_ref(p->so, &so); - return so; + if (!nv50_program_validate(nv50, vp)) + return; + + BEGIN_RING(chan, RING_3D(VP_ATTR_EN(0)), 2); + OUT_RING (chan, vp->vp.attrs[0]); + OUT_RING (chan, vp->vp.attrs[1]); + BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_RESULT), 1); + OUT_RING (chan, vp->max_out); + BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_TEMP), 1); + OUT_RING (chan, vp->max_gpr); + BEGIN_RING(chan, RING_3D(VP_START_ID), 1); + OUT_RING (chan, vp->code_base); } -struct nouveau_stateobj * +void nv50_fragprog_validate(struct nv50_context *nv50) { - struct nv50_program *p = nv50->fragprog; - struct nouveau_stateobj *so = NULL; - - if (!p->translated) { - if (nv50_program_validate(p)) - nv50_fp_update_stateobj(nv50, p); - else - return NULL; - } - - if (nv50->dirty & NV50_NEW_FRAGPROG_CB) - nv50_program_validate_data(nv50, p); - - if (!(nv50->dirty & NV50_NEW_FRAGPROG)) - return NULL; - - nv50_program_validate_common(nv50, p); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_program *fp = nv50->fragprog; - so_ref(p->so, &so); - return so; + if (!nv50_program_validate(nv50, fp)) + return; + + BEGIN_RING(chan, RING_3D(FP_REG_ALLOC_TEMP), 1); + OUT_RING (chan, fp->max_gpr); + BEGIN_RING(chan, RING_3D(FP_RESULT_COUNT), 1); + OUT_RING (chan, fp->max_out); + BEGIN_RING(chan, RING_3D(FP_CONTROL), 1); + OUT_RING (chan, fp->fp.flags[0]); + BEGIN_RING(chan, RING_3D(FP_CTRL_UNK196C), 1); + OUT_RING (chan, fp->fp.flags[1]); + BEGIN_RING(chan, RING_3D(FP_START_ID), 1); + OUT_RING (chan, fp->code_base); } -struct nouveau_stateobj * -nv50_geomprog_validate(struct nv50_context *nv50) +void +nv50_gmtyprog_validate(struct nv50_context *nv50) { - struct nv50_program *p = nv50->geomprog; - struct nouveau_stateobj *so = NULL; - - /* GP may be NULL, but VP and FP may not */ - if (!p) - return NULL; /* GP is deactivated in linkage validation */ - - if (!p->translated) { - if (nv50_program_validate(p)) - nv50_gp_update_stateobj(nv50, p); - else - return NULL; - } - - if (nv50->dirty & NV50_NEW_GEOMPROG_CB) - nv50_program_validate_data(nv50, p); - - if (!(nv50->dirty & NV50_NEW_GEOMPROG)) - return NULL; - - nv50_program_validate_common(nv50, p); - - so_ref(p->so, &so); - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_program *gp = nv50->vertprog; + + if (!nv50_program_validate(nv50, gp)) + return; + + BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1); + OUT_RING (chan, gp->max_gpr); + BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_RESULT), 1); + OUT_RING (chan, gp->max_out); + BEGIN_RING(chan, RING_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1); + OUT_RING (chan, gp->gp.prim_type); + BEGIN_RING(chan, RING_3D(GP_VERTEX_OUTPUT_COUNT), 1); + OUT_RING (chan, gp->gp.vert_count); + BEGIN_RING(chan, RING_3D(GP_START_ID), 1); + OUT_RING (chan, gp->code_base); } -/* XXX: this might not work correctly in all cases yet: we assume that - * an FP generic input that is not written in the VP is gl_PointCoord. - */ -static uint32_t -nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m) +void +nv50_sprite_coords_validate(struct nv50_context *nv50) { - struct nv50_program *vp = nv50->vertprog; + struct nouveau_channel *chan = nv50->screen->base.channel; + uint32_t pntc[8], mode; struct nv50_program *fp = nv50->fragprog; unsigned i, c; + unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff; + + if (!nv50->rast->pipe.point_quad_rasterization) { + if (nv50->state.point_sprite) { + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8); + for (i = 0; i < 8; ++i) + OUT_RING(chan, 0); - memset(pntc, 0, 8 * sizeof(uint32_t)); + nv50->state.point_sprite = FALSE; + } + return; + } else { + nv50->state.point_sprite = TRUE; + } - if (nv50->geomprog) - vp = nv50->geomprog; + memset(pntc, 0, sizeof(pntc)); for (i = 0; i < fp->in_nr; i++) { - unsigned j, n = util_bitcount(fp->in[i].mask); + unsigned n = util_bitcount(fp->in[i].mask); if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } - - for (j = 0; j < vp->out_nr; ++j) - if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si) - break; - - if (j < vp->out_nr) { - uint32_t en = nv50->rasterizer->pipe.sprite_coord_enable; - - if (!(en & (1 << vp->out[j].si))) { - m += n; - continue; - } + if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) { + m += n; + continue; } - /* this is either PointCoord or replaced by sprite coords */ - for (c = 0; c < 4; c++) { - if (!(fp->in[i].mask & (1 << c))) - continue; - pntc[m / 8] |= (c + 1) << ((m % 8) * 4); - ++m; + for (c = 0; c < 4; ++c) { + if (fp->in[i].mask & (1 << c)) { + pntc[m / 8] |= (c + 1) << ((m % 8) * 4); + ++m; + } } } - if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) - return 0; - return (1 << 4); + + if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) + mode = 0x00; + else + mode = 0x10; + + BEGIN_RING(chan, RING_3D(POINT_SPRITE_CTRL), 1); + OUT_RING (chan, mode); + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8); + OUT_RINGp (chan, pntc, 8); } static int -nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4], +nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4], struct nv50_varying *in, struct nv50_varying *out) { int c; uint8_t mv = out->mask, mf = in->mask, oid = out->hw; - uint8_t *map = (uint8_t *)map32; for (c = 0; c < 4; ++c) { if (mf & 1) { @@ -465,140 +309,112 @@ nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4], return mid; } -struct nouveau_stateobj * +void nv50_fp_linkage_validate(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_program *vp; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog; struct nv50_program *fp = nv50->fragprog; - struct nouveau_stateobj *so; struct nv50_varying dummy; int i, n, c, m; - - uint32_t map[16], lin[4], pntc[8]; - + uint32_t primid = 0; + uint32_t psiz = 0x000; uint32_t interp = fp->fp.interp; uint32_t colors = fp->fp.colors; - uint32_t clip = 0x04; - uint32_t psiz = 0x000; - uint32_t primid = 0; - uint32_t sysval = 0; + uint32_t lin[4]; + uint8_t map[64]; - if (nv50->geomprog) { - vp = nv50->geomprog; - memset(map, 0x80, sizeof(map)); - } else { - vp = nv50->vertprog; - memset(map, 0x40, sizeof(map)); - } - memset(lin, 0, sizeof(lin)); + memset(lin, 0x00, sizeof(lin)); + + /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx + * or is it the first byte ? + */ + memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map)); - dummy.linear = 0; dummy.mask = 0xf; /* map all components of HPOS */ + dummy.linear = 0; m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]); - if (vp->vp.clpd < 0x40) { - for (c = 0; c < vp->vp.clpd_nr; ++c) { - map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8); - ++m; - } - clip |= vp->vp.clpd_nr << 8; - } + for (c = 0; c < vp->vp.clpd_nr; ++c) + map[m++] |= vp->vp.clpd + c; colors |= m << 8; /* adjust BFC0 id */ - /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ - if (nv50->rasterizer->pipe.light_twoside) { + /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */ + if (nv50->rast->pipe.light_twoside) { for (i = 0; i < 2; ++i) m = nv50_vec4_map(map, m, lin, - &fp->in[fp->vp.bfc[i]], - &vp->out[vp->vp.bfc[i]]); + &fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]); } - colors += m - 4; /* adjust FFC0 id */ - interp |= m << 8; /* set mid where 'normal' FP inputs start */ + interp |= m << 8; /* set map id where 'normal' FP inputs start */ dummy.mask = 0x0; - for (i = 0; i < fp->in_nr; i++) { + for (i = 0; i < fp->in_nr; ++i) { for (n = 0; n < vp->out_nr; ++n) if (vp->out[n].sn == fp->in[i].sn && vp->out[n].si == fp->in[i].si) break; - m = nv50_vec4_map(map, m, lin, &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy); - } + } /* PrimitiveID either is replaced by the system value, or * written by the geometry shader into an output register */ if (fp->gp.primid < 0x40) { - i = (m % 4) * 8; - map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i); - primid = m++; + primid = m; + map[m++] = vp->gp.primid; } - if (nv50->rasterizer->pipe.point_size_per_vertex) { - i = (m % 4) * 8; - map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i); - psiz = (m++ << 4) | 1; + if (nv50->rast->pipe.point_size_per_vertex) { + psiz = (m << 4) | 1; + map[m++] = vp->vp.psiz; } - /* now fill the stateobj (at most 28 so_data) */ - so = so_new(10, 54, 0); - n = (m + 3) / 4; assert(m <= 64); - if (vp->type == PIPE_SHADER_GEOMETRY) { - so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); - so_datap (so, map, n); + + if (unlikely(nv50->gmtyprog)) { + BEGIN_RING(chan, RING_3D(GP_RESULT_MAP_SIZE), 1); + OUT_RING (chan, m); + BEGIN_RING(chan, RING_3D(GP_RESULT_MAP(0)), n); + OUT_RINGp (chan, map, n); } else { - so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); - so_data (so, vp->vp.attrs[2]); + BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1); + OUT_RING (chan, vp->vp.attrs[2]); - so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1); - so_data (so, primid); + BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_4), 1); + OUT_RING (chan, primid); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); - so_datap (so, map, n); + BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1); + OUT_RING (chan, m); + BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n); + OUT_RINGp (chan, map, n); } - so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); - so_data (so, colors); - so_data (so, clip); - so_data (so, sysval); - so_data (so, psiz); - - so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); - so_data (so, interp); - - so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); - so_datap (so, lin, 4); + BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 4); + OUT_RING (chan, colors); + OUT_RING (chan, (vp->vp.clpd_nr << 8) | 4); + OUT_RING (chan, 0); + OUT_RING (chan, psiz); - if (nv50->rasterizer->pipe.point_quad_rasterization) { - so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); - so_data (so, - nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff)); + BEGIN_RING(chan, RING_3D(FP_INTERPOLANT_CTRL), 1); + OUT_RING (chan, interp); - so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); - so_datap (so, pntc, 8); - } + nv50->state.interpolant_ctrl = interp; - so_method(so, tesla, NV50TCL_GP_ENABLE, 1); - so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); + BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4); + OUT_RINGp (chan, lin, 4); - return so; + BEGIN_RING(chan, RING_3D(GP_ENABLE), 1); + OUT_RING (chan, nv50->gmtyprog ? 1 : 0); } static int -nv50_vp_gp_mapping(uint32_t *map32, int m, +nv50_vp_gp_mapping(uint8_t *map, int m, struct nv50_program *vp, struct nv50_program *gp) { - uint8_t *map = (uint8_t *)map32; int i, j, c; for (i = 0; i < gp->in_nr; ++i) { @@ -625,34 +441,29 @@ nv50_vp_gp_mapping(uint32_t *map32, int m, return m; } -struct nouveau_stateobj * +void nv50_gp_linkage_validate(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so; + struct nouveau_channel *chan = nv50->screen->base.channel; struct nv50_program *vp = nv50->vertprog; - struct nv50_program *gp = nv50->geomprog; - uint32_t map[16]; + struct nv50_program *gp = nv50->gmtyprog; int m = 0; + int n; + uint8_t map[64]; if (!gp) - return NULL; + return; memset(map, 0, sizeof(map)); m = nv50_vp_gp_mapping(map, m, vp, gp); - so = so_new(3, 24 - 3, 0); - - so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); - so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]); - - assert(m <= 32); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); + n = (m + 3) / 4; - m = (m + 3) / 4; - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); - so_datap (so, map, m); + BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1); + OUT_RING (chan, vp->vp.attrs[2] | gp->vp.attrs[2]); - return so; + BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1); + OUT_RING (chan, m); + BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n); + OUT_RINGp (chan, map, n); } diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f42fa2d4d2..db25715969 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Ben Skeggs + * Copyright 2010 Christoph Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,871 +20,856 @@ * SOFTWARE. */ -#include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" +#include "nv50_stateobj.h" #include "nv50_context.h" -#include "nv50_texture.h" -#include "nouveau/nouveau_stateobj.h" +#include "nv50_3d.xml.h" +#include "nv50_texture.xml.h" + +#include "nouveau/nouveau_gldefs.h" static INLINE uint32_t nv50_colormask(unsigned mask) { - uint32_t cmask = 0; + uint32_t ret = 0; - if (mask & PIPE_MASK_R) - cmask |= 0x0001; - if (mask & PIPE_MASK_G) - cmask |= 0x0010; - if (mask & PIPE_MASK_B) - cmask |= 0x0100; - if (mask & PIPE_MASK_A) - cmask |= 0x1000; + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; - return cmask; + return ret; } +#define NV50_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + static INLINE uint32_t -nv50_blend_func(unsigned factor) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO; - case PIPE_BLENDFACTOR_ONE: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA; - default: - return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO; - } +nv50_blend_fac(unsigned factor) +{ + switch (factor) { + NV50_BLEND_FACTOR_CASE(ONE, ONE); + NV50_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NV50_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NV50_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NV50_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NV50_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NV50_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NV50_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NV50_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NV50_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NV50_BLEND_FACTOR_CASE(ZERO, ZERO); + NV50_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NV50_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NV50_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NV50_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NV50_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } } static void * nv50_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nouveau_stateobj *so = so_new(5, 24, 0); - struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; - struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); - unsigned i, blend_enabled = 0; - - /*XXX ignored: - * - dither - */ - - so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); - if (cso->independent_blend_enable) { - for (i = 0; i < 8; ++i) { - so_data(so, cso->rt[i].blend_enable); - if (cso->rt[i].blend_enable) - blend_enabled = 1; - } - } else - if (cso->rt[0].blend_enable) { - blend_enabled = 1; - for (i = 0; i < 8; i++) - so_data(so, 1); - } else { - for (i = 0; i < 8; i++) - so_data(so, 0); - } - if (blend_enabled) { - so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); - so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); - so_data (so, nv50_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nv50_blend_func(cso->rt[0].rgb_dst_factor)); - so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); - so_data (so, nv50_blend_func(cso->rt[0].alpha_src_factor)); - so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); - so_data (so, nv50_blend_func(cso->rt[0].alpha_dst_factor)); - } - - if (cso->logicop_enable == 0 ) { - so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } else { - so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } - - so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); - if (cso->independent_blend_enable) - for (i = 0; i < 8; ++i) - so_data(so, nv50_colormask(cso->rt[i].colormask)); - else { - uint32_t cmask = nv50_colormask(cso->rt[0].colormask); - for (i = 0; i < 8; i++) - so_data(so, cmask); - } - - bso->pipe = *cso; - so_ref(so, &bso->so); - so_ref(NULL, &so); - return (void *)bso; + const struct pipe_blend_state *cso) +{ + struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj); + int i; + boolean emit_common_func = cso->rt[0].blend_enable; + + if (nv50_context(pipe)->screen->tesla->grclass >= NVA3_3D) { + SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1); + SB_DATA (so, cso->independent_blend_enable); + } + + so->pipe = *cso; + + SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); + if (cso->independent_blend_enable) { + for (i = 0; i < 8; ++i) { + SB_DATA(so, cso->rt[i].blend_enable); + if (cso->rt[i].blend_enable) + emit_common_func = TRUE; + } + + if (nv50_context(pipe)->screen->tesla->grclass >= NVA3_3D) { + emit_common_func = FALSE; + + for (i = 0; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + SB_BEGIN_3D_(so, NVA3_3D_IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_dst_factor)); + } + } + } else { + for (i = 0; i < 8; ++i) + SB_DATA(so, cso->rt[0].blend_enable); + } + + if (emit_common_func) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_dst_factor)); + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + } else { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 1); + SB_DATA (so, 0); + } + + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + if (cso->independent_blend_enable) { + for (i = 0; i < 8; ++i) + SB_DATA(so, nv50_colormask(cso->rt[i].colormask)); + } else { + uint32_t cmask = nv50_colormask(cso->rt[0].colormask); + for (i = 0; i < 8; ++i) + SB_DATA(so, cmask); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return so; } static void nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->blend = hwcso; - nv50->dirty |= NV50_NEW_BLEND; + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; } static void nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nv50_blend_stateobj *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); + FREE(hwcso); } -static INLINE unsigned -wrap_mode(unsigned wrap) -{ - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - return NV50TSC_1_0_WRAPS_REPEAT; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return NV50TSC_1_0_WRAPS_MIRROR_REPEAT; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER; - case PIPE_TEX_WRAP_CLAMP: - return NV50TSC_1_0_WRAPS_CLAMP; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - return NV50TSC_1_0_WRAPS_MIRROR_CLAMP; - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - return NV50TSC_1_0_WRAPS_REPEAT; - } -} static void * -nv50_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv50_sampler_stateobj *sso = CALLOC(1, sizeof(*sso)); - unsigned *tsc = sso->tsc; - float limit; - - tsc[0] = (0x00026000 | - (wrap_mode(cso->wrap_s) << 0) | - (wrap_mode(cso->wrap_t) << 3) | - (wrap_mode(cso->wrap_r) << 6)); - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - tsc[1] |= NV50TSC_1_1_MAGF_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tsc[1] |= NV50TSC_1_1_MINF_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - tsc[1] |= NV50TSC_1_1_MINF_NEAREST; - break; - } - - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_LINEAR: - tsc[1] |= NV50TSC_1_1_MIPF_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NEAREST: - tsc[1] |= NV50TSC_1_1_MIPF_NEAREST; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - tsc[1] |= NV50TSC_1_1_MIPF_NONE; - break; - } - - if (cso->max_anisotropy >= 16) - tsc[0] |= (7 << 20); - else - if (cso->max_anisotropy >= 12) - tsc[0] |= (6 << 20); - else { - tsc[0] |= (cso->max_anisotropy >> 1) << 20; - - if (cso->max_anisotropy >= 4) - tsc[1] |= NV50TSC_1_1_UNKN_ANISO_35; - else - if (cso->max_anisotropy >= 2) - tsc[1] |= NV50TSC_1_1_UNKN_ANISO_15; - } - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - /* XXX: must be deactivated for non-shadow textures */ - tsc[0] |= (1 << 9); - tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; - } - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 12; - - tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) | - ((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8); - - tsc[4] = fui(cso->border_color[0]); - tsc[5] = fui(cso->border_color[1]); - tsc[6] = fui(cso->border_color[2]); - tsc[7] = fui(cso->border_color[3]); - - sso->normalized = cso->normalized_coords; - return (void *)sso; -} - -/* type == 0 for VPs, 1 for GPs, 2 for FPs, which is how the - * relevant tesla methods are indexed (NV50TCL_BIND_TSC etc.) - */ -static INLINE void -nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type, - unsigned nr, void **sampler) +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv50_rasterizer_stateobj *so; + + so = CALLOC_STRUCT(nv50_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + +#ifndef NV50_SCISSORS_CLIPPING + SB_BEGIN_3D(so, SCISSOR_ENABLE(0), 1); + SB_DATA (so, cso->scissor); +#endif + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NV50_3D_SHADE_MODEL_FLAT : + NV50_3D_SHADE_MODEL_SMOOTH); + SB_BEGIN_3D(so, PROVOKING_VERTEX_LAST, 1); + SB_DATA (so, !cso->flatshade_first); + SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1); + SB_DATA (so, cso->light_twoside); + + SB_BEGIN_3D(so, LINE_WIDTH, 1); + SB_DATA (so, fui(cso->line_width)); + SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1); + SB_DATA (so, cso->line_smooth); + + SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); + if (cso->line_stipple_enable) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, LINE_STIPPLE, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + } else { + SB_DATA (so, 0); + } + + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + SB_BEGIN_3D(so, POINT_SPRITE_ENABLE, 1); + SB_DATA (so, cso->point_quad_rasterization); + SB_BEGIN_3D(so, POINT_SMOOTH_ENABLE, 1); + SB_DATA (so, cso->point_smooth); + + SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 3); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_DATA (so, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NV50_3D_FRONT_FACE_CCW : + NV50_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NV50_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NV50_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NV50_3D_CULL_FACE_BACK); + break; + } + + SB_BEGIN_3D(so, POLYGON_STIPPLE_ENABLE, 1); + SB_DATA (so, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units * 2.0f)); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - memcpy(nv50->sampler[type], sampler, nr * sizeof(void *)); + nv50->rast = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} - nv50->sampler_nr[type] = nr; - nv50->dirty |= NV50_NEW_SAMPLER; +static void * +nv50_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv50_zsa_stateobj *so = CALLOC_STRUCT(nv50_zsa_stateobj); + + so->pipe = *cso; + + SB_BEGIN_3D(so, DEPTH_WRITE_ENABLE, 1); + SB_DATA (so, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); + if (cso->depth.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } else { + SB_DATA (so, 0); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); + SB_DATA (so, cso->stencil[0].writemask); + SB_DATA (so, cso->stencil[0].valuemask); + } else { + SB_BEGIN_3D(so, STENCIL_ENABLE, 1); + SB_DATA (so, 0); + } + + if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else { + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 1); + SB_DATA (so, 0); + } + + SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); + if (cso->alpha.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } else { + SB_DATA (so, 0); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; } static void -nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s) +nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso) { - nv50_sampler_state_bind(pipe, 0, nr, s); + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; } static void -nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s) +nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso) { - nv50_sampler_state_bind(pipe, 2, nr, s); + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv50_tsc_entry *so = CALLOC_STRUCT(nv50_tsc_entry); + float f[2]; + + so->id = -1; + + so->tsc[0] = (0x00026000 | + (nv50_tsc_wrap_mode(cso->wrap_s) << 0) | + (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | + (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + so->tsc[1] |= NV50_TSC_1_MIPF_NONE; + break; + } + + if (cso->max_anisotropy >= 16) + so->tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12) + so->tsc[0] |= (6 << 20); + else { + so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; + + if (cso->max_anisotropy >= 4) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; + else + if (cso->max_anisotropy >= 2) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; + } + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* NOTE: must be deactivated for non-shadow textures */ + so->tsc[0] |= (1 << 9); + so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; + } + + f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f); + so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12; + + f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f); + f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f); + so->tsc[2] |= + (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff); + + so->tsc[4] = fui(cso->border_color[0]); + so->tsc[5] = fui(cso->border_color[1]); + so->tsc[6] = fui(cso->border_color[2]); + so->tsc[7] = fui(cso->border_color[3]); + + return (void *)so; } static void nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { - FREE(hwcso); + unsigned s, i; + + for (s = 0; s < 5; ++s) + for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i) + if (nv50_context(pipe)->samplers[s][i] == hwcso) + nv50_context(pipe)->samplers[s][i] = NULL; + + nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso)); + + FREE(hwcso); } static INLINE void -nv50_set_sampler_views(struct pipe_context *pipe, unsigned p, - unsigned nr, - struct pipe_sampler_view **views) +nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, + unsigned nr, void **hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); - unsigned i; + unsigned i; - for (i = 0; i < nr; i++) - pipe_sampler_view_reference(&nv50->sampler_views[p][i], - views[i]); + for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *old = nv50->samplers[s][i]; - for (i = nr; i < nv50->sampler_view_nr[p]; i++) - pipe_sampler_view_reference(&nv50->sampler_views[p][i], NULL); + nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + if (old) + nv50_screen_tsc_unlock(nv50->screen, old); + } + for (; i < nv50->num_samplers[s]; ++i) + if (nv50->samplers[s][i]) + nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]); - nv50->sampler_view_nr[p] = nr; - nv50->dirty |= NV50_NEW_TEXTURE; + nv50->num_samplers[s] = nr; + + nv50->dirty |= NV50_NEW_SAMPLERS; } static void -nv50_set_vp_sampler_views(struct pipe_context *pipe, - unsigned nr, - struct pipe_sampler_view **views) +nv50_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) { - nv50_set_sampler_views(pipe, 0, nr, views); + nv50_stage_sampler_states_bind(nv50_context(pipe), 0, nr, s); } static void -nv50_set_fp_sampler_views(struct pipe_context *pipe, - unsigned nr, - struct pipe_sampler_view **views) +nv50_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) { - nv50_set_sampler_views(pipe, 2, nr, views); + nv50_stage_sampler_states_bind(nv50_context(pipe), 2, nr, s); } static void -nv50_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view) +nv50_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) { - pipe_resource_reference(&view->texture, NULL); - FREE(nv50_sampler_view(view)); + nv50_stage_sampler_states_bind(nv50_context(pipe), 1, nr, s); } -static struct pipe_sampler_view * -nv50_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ) +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nv50_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) { - struct nv50_sampler_view *view = CALLOC_STRUCT(nv50_sampler_view); + pipe_resource_reference(&view->texture, NULL); - view->pipe = *templ; - view->pipe.reference.count = 1; - view->pipe.texture = NULL; - pipe_resource_reference(&view->pipe.texture, texture); - view->pipe.context = pipe; + nv50_screen_tic_free(nv50_context(pipe)->screen, nv50_tic_entry(view)); - if (!nv50_tex_construct(view)) { - nv50_sampler_view_destroy(pipe, &view->pipe); - return NULL; - } - return &view->pipe; + FREE(nv50_tic_entry(view)); } +static INLINE void +nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + if (old) + nv50_screen_tic_unlock(nv50->screen, old); -static void * -nv50_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nouveau_stateobj *so = so_new(16, 22, 0); - struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; - struct nv50_rasterizer_stateobj *rso = - CALLOC_STRUCT(nv50_rasterizer_stateobj); - - /*XXX: ignored - * - light_twoside - * - point_smooth - * - multisample - * - point_sprite / sprite_coord_mode - */ - - so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1); - so_data (so, cso->scissor); - - so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : - NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); - so_data (so, cso->flatshade_first ? 0 : 1); - - so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); - so_data (so, cso->light_twoside); - - so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); - so_data (so, fui(cso->line_width)); - so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); - so_data (so, cso->line_smooth ? 1 : 0); - if (cso->line_stipple_enable) { - so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); - so_data (so, (cso->line_stipple_pattern << 8) | - cso->line_stipple_factor); - } else { - so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, tesla, NV50TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1); - so_data (so, cso->point_quad_rasterization ? 1 : 0); - - so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); - so_data(so, nvgl_polygon_mode(cso->fill_front)); - so_data(so, nvgl_polygon_mode(cso->fill_back)); - so_data(so, cso->poly_smooth ? 1 : 0); - - so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3); - so_data (so, cso->cull_face != PIPE_FACE_NONE); - if (cso->front_ccw) { - so_data(so, NV50TCL_FRONT_FACE_CCW); - } - else { - so_data(so, NV50TCL_FRONT_FACE_CW); - } - switch (cso->cull_face) { - case PIPE_FACE_FRONT: - so_data(so, NV50TCL_CULL_FACE_FRONT); - break; - case PIPE_FACE_BACK: - so_data(so, NV50TCL_CULL_FACE_BACK); - break; - case PIPE_FACE_FRONT_AND_BACK: - so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV50TCL_CULL_FACE_BACK); - break; - } - - so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - so_data(so, cso->offset_point); - so_data(so, cso->offset_line); - so_data(so, cso->offset_tri); - - if (cso->offset_point || - cso->offset_line || - cso->offset_tri) { - so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); - so_data (so, fui(cso->offset_scale)); - so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units * 2.0f)); - } - - rso->pipe = *cso; - so_ref(so, &rso->so); - so_ref(NULL, &so); - return (void *)rso; + pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); + } + + for (i = nr; i < nv50->num_textures[s]; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + if (!old) + continue; + nv50_screen_tic_unlock(nv50->screen, old); + + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); + } + + nv50->num_textures[s] = nr; + + nv50_bufctx_reset(nv50, NV50_BUFCTX_TEXTURES); + + nv50->dirty |= NV50_NEW_TEXTURES; } static void -nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +nv50_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) { - struct nv50_context *nv50 = nv50_context(pipe); - - nv50->rasterizer = hwcso; - nv50->dirty |= NV50_NEW_RASTERIZER; + nv50_stage_set_sampler_views(nv50_context(pipe), 0, nr, views); } static void -nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +nv50_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) { - struct nv50_rasterizer_stateobj *rso = hwcso; - - so_ref(NULL, &rso->so); - FREE(rso); + nv50_stage_set_sampler_views(nv50_context(pipe), 2, nr, views); } -static void * -nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; - struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); - struct nouveau_stateobj *so = so_new(9, 21, 0); - - so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); - so_data (so, cso->depth.writemask ? 1 : 0); - if (cso->depth.enabled) { - so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); - so_data (so, 1); - so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); - so_data (so, nvgl_comparison_op(cso->depth.func)); - } else { - so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[0].enabled) { - so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 5); - so_data (so, 1); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, tesla, NV50TCL_STENCIL_FRONT_MASK, 2); - so_data (so, cso->stencil[0].writemask); - so_data (so, cso->stencil[0].valuemask); - } else { - so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); - so_data (so, 1); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, NV50TCL_STENCIL_BACK_MASK, 2); - so_data (so, cso->stencil[1].writemask); - so_data (so, cso->stencil[1].valuemask); - } else { - so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - if (cso->alpha.enabled) { - so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); - so_data (so, 1); - so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2); - so_data (so, fui(cso->alpha.ref_value)); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - } else { - so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); - so_data (so, 0); - } - - zsa->pipe = *cso; - so_ref(so, &zsa->so); - so_ref(NULL, &so); - return (void *)zsa; +static void +nv50_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 1, nr, views); } -static void -nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +/* ============================= SHADERS ======================================= + */ + +static void * +nv50_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *prog; - nv50->zsa = hwcso; - nv50->dirty |= NV50_NEW_ZSA; + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + + prog->type = type; + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + return (void *)prog; } static void -nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nv50_zsa_stateobj *zsa = hwcso; + struct nv50_program *prog = (struct nv50_program *)hwcso; + + nv50_program_destroy(nv50_context(pipe), prog); - so_ref(NULL, &zsa->so); - FREE(zsa); + FREE((void *)prog->pipe.tokens); + FREE(prog); } static void * nv50_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) + const struct pipe_shader_state *cso) { - struct nv50_program *p = CALLOC_STRUCT(nv50_program); - - p->pipe.tokens = tgsi_dup_tokens(cso->tokens); - p->type = PIPE_SHADER_VERTEX; - return (void *)p; + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); } static void nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->vertprog = hwcso; - nv50->dirty |= NV50_NEW_VERTPROG; -} - -static void -nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv50_context *nv50 = nv50_context(pipe); - struct nv50_program *p = hwcso; - - nv50_program_destroy(nv50, p); - FREE((void *)p->pipe.tokens); - FREE(p); + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; } static void * nv50_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) + const struct pipe_shader_state *cso) { - struct nv50_program *p = CALLOC_STRUCT(nv50_program); - - p->pipe.tokens = tgsi_dup_tokens(cso->tokens); - p->type = PIPE_SHADER_FRAGMENT; - return (void *)p; + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); } static void nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); - - nv50->fragprog = hwcso; - nv50->dirty |= NV50_NEW_FRAGPROG; -} - -static void -nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv50_context *nv50 = nv50_context(pipe); - struct nv50_program *p = hwcso; + struct nv50_context *nv50 = nv50_context(pipe); - nv50_program_destroy(nv50, p); - FREE((void *)p->pipe.tokens); - FREE(p); + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; } static void * nv50_gp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) + const struct pipe_shader_state *cso) { - struct nv50_program *p = CALLOC_STRUCT(nv50_program); - - p->pipe.tokens = tgsi_dup_tokens(cso->tokens); - p->type = PIPE_SHADER_GEOMETRY; - return (void *)p; + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); } static void nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->geomprog = hwcso; - nv50->dirty |= NV50_NEW_GEOMPROG; + nv50->gmtyprog = hwcso; + nv50->dirty |= NV50_NEW_GMTYPROG; } static void -nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso) +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_resource *res) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nv50_program *p = hwcso; + struct nv50_context *nv50 = nv50_context(pipe); + + if (nv50->constbuf[shader][index]) + nv50_bufctx_del_resident(nv50, NV50_BUFCTX_CONSTANT, + nv04_resource(nv50->constbuf[shader][index])); - nv50_program_destroy(nv50, p); - FREE((void *)p->pipe.tokens); - FREE(p); + pipe_resource_reference(&nv50->constbuf[shader][index], res); + + nv50->constbuf_dirty[shader] |= 1 << index; + + nv50->dirty |= NV50_NEW_CONSTBUF; } +/* ============================================================================= + */ + static void nv50_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) + const struct pipe_blend_color *bcol) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->blend_colour = *bcol; - nv50->dirty |= NV50_NEW_BLEND_COLOUR; + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; } - static void +static void nv50_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) + const struct pipe_stencil_ref *sr) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->stencil_ref = *sr; - nv50->dirty |= NV50_NEW_STENCIL_REF; + nv50->stencil_ref = *sr; + nv50->dirty |= NV50_NEW_STENCIL_REF; } static void nv50_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) + const struct pipe_clip_state *clip) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); + const unsigned size = clip->nr * sizeof(clip->ucp[0]); - nv50->clip.depth_clamp = clip->depth_clamp; - nv50->dirty |= NV50_NEW_CLIP; -} + memcpy(&nv50->clip.ucp[0][0], &clip->ucp[0][0], size); + nv50->clip.nr = clip->nr; -static void -nv50_set_sample_mask(struct pipe_context *pipe, - unsigned sample_mask) -{ + nv50->clip.depth_clamp = clip->depth_clamp; + + nv50->dirty |= NV50_NEW_CLIP; } static void -nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_resource *buf ) +nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf; - nv50->dirty |= NV50_NEW_VERTPROG_CB; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; - nv50->dirty |= NV50_NEW_FRAGPROG_CB; - } else - if (shader == PIPE_SHADER_GEOMETRY) { - nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; - nv50->dirty |= NV50_NEW_GEOMPROG_CB; - } + nv50->sample_mask = sample_mask; + nv50->dirty |= NV50_NEW_SAMPLE_MASK; } + static void nv50_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) + const struct pipe_framebuffer_state *fb) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->framebuffer = *fb; - nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nv50->framebuffer = *fb; + nv50->dirty |= NV50_NEW_FRAMEBUFFER; } static void nv50_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) + const struct pipe_poly_stipple *stipple) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->stipple = *stipple; - nv50->dirty |= NV50_NEW_STIPPLE; + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; } static void nv50_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) + const struct pipe_scissor_state *scissor) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->scissor = *s; - nv50->dirty |= NV50_NEW_SCISSOR; + nv50->scissor = *scissor; + nv50->dirty |= NV50_NEW_SCISSOR; } static void nv50_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) + const struct pipe_viewport_state *vpt) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->viewport = *vpt; - nv50->dirty |= NV50_NEW_VIEWPORT; + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; } static void -nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) +nv50_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vb) { - struct nv50_context *nv50 = nv50_context(pipe); - - memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); - nv50->vtxbuf_nr = count; + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; - nv50->dirty |= NV50_NEW_ARRAYS; -} + for (i = 0; i < count; ++i) + pipe_resource_reference(&nv50->vtxbuf[i].buffer, vb[i].buffer); + for (; i < nv50->num_vtxbufs; ++i) + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); -static void -nv50_set_index_buffer(struct pipe_context *pipe, - const struct pipe_index_buffer *ib) -{ - struct nv50_context *nv50 = nv50_context(pipe); + memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); + nv50->num_vtxbufs = count; - if (ib) - memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf)); - else - memset(&nv50->idxbuf, 0, sizeof(nv50->idxbuf)); + nv50_bufctx_reset(nv50, NV50_BUFCTX_VERTEX); - /* TODO make this more like a state */ + nv50->dirty |= NV50_NEW_ARRAYS; } -static void * -nv50_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) +static void +nv50_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) { - struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - - nv50_vtxelt_construct(cso); + struct nv50_context *nv50 = nv50_context(pipe); - return (void *)cso; -} + if (ib) { + pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer); -static void -nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); + memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf)); + } else { + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + } } static void -nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) +nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_context *nv50 = nv50_context(pipe); - nv50->vtxelt = hwcso; - nv50->dirty |= NV50_NEW_ARRAYS; + nv50->vertex = hwcso; + nv50->dirty |= NV50_NEW_VERTEX; } void nv50_init_state_functions(struct nv50_context *nv50) { - nv50->pipe.create_blend_state = nv50_blend_state_create; - nv50->pipe.bind_blend_state = nv50_blend_state_bind; - nv50->pipe.delete_blend_state = nv50_blend_state_delete; - - nv50->pipe.create_sampler_state = nv50_sampler_state_create; - nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; - nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind; - nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind; - nv50->pipe.set_fragment_sampler_views = nv50_set_fp_sampler_views; - nv50->pipe.set_vertex_sampler_views = nv50_set_vp_sampler_views; - nv50->pipe.create_sampler_view = nv50_create_sampler_view; - nv50->pipe.sampler_view_destroy = nv50_sampler_view_destroy; - - nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; - nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; - nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; - - nv50->pipe.create_depth_stencil_alpha_state = - nv50_depth_stencil_alpha_state_create; - nv50->pipe.bind_depth_stencil_alpha_state = - nv50_depth_stencil_alpha_state_bind; - nv50->pipe.delete_depth_stencil_alpha_state = - nv50_depth_stencil_alpha_state_delete; - - nv50->pipe.create_vs_state = nv50_vp_state_create; - nv50->pipe.bind_vs_state = nv50_vp_state_bind; - nv50->pipe.delete_vs_state = nv50_vp_state_delete; - - nv50->pipe.create_fs_state = nv50_fp_state_create; - nv50->pipe.bind_fs_state = nv50_fp_state_bind; - nv50->pipe.delete_fs_state = nv50_fp_state_delete; - - nv50->pipe.create_gs_state = nv50_gp_state_create; - nv50->pipe.bind_gs_state = nv50_gp_state_bind; - nv50->pipe.delete_gs_state = nv50_gp_state_delete; - - nv50->pipe.set_blend_color = nv50_set_blend_color; - nv50->pipe.set_stencil_ref = nv50_set_stencil_ref; - nv50->pipe.set_clip_state = nv50_set_clip_state; - nv50->pipe.set_sample_mask = nv50_set_sample_mask; - nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; - nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; - nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; - nv50->pipe.set_scissor_state = nv50_set_scissor_state; - nv50->pipe.set_viewport_state = nv50_set_viewport_state; - - nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create; - nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete; - nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind; - - nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; - nv50->pipe.set_index_buffer = nv50_set_index_buffer; + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->create_blend_state = nv50_blend_state_create; + pipe->bind_blend_state = nv50_blend_state_bind; + pipe->delete_blend_state = nv50_blend_state_delete; + + pipe->create_rasterizer_state = nv50_rasterizer_state_create; + pipe->bind_rasterizer_state = nv50_rasterizer_state_bind; + pipe->delete_rasterizer_state = nv50_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nv50_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nv50_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nv50_zsa_state_delete; + + pipe->create_sampler_state = nv50_sampler_state_create; + pipe->delete_sampler_state = nv50_sampler_state_delete; + pipe->bind_vertex_sampler_states = nv50_vp_sampler_states_bind; + pipe->bind_fragment_sampler_states = nv50_fp_sampler_states_bind; + pipe->bind_geometry_sampler_states = nv50_gp_sampler_states_bind; + + pipe->create_sampler_view = nv50_create_sampler_view; + pipe->sampler_view_destroy = nv50_sampler_view_destroy; + pipe->set_vertex_sampler_views = nv50_vp_set_sampler_views; + pipe->set_fragment_sampler_views = nv50_fp_set_sampler_views; + pipe->set_geometry_sampler_views = nv50_gp_set_sampler_views; + + pipe->create_vs_state = nv50_vp_state_create; + pipe->create_fs_state = nv50_fp_state_create; + pipe->create_gs_state = nv50_gp_state_create; + pipe->bind_vs_state = nv50_vp_state_bind; + pipe->bind_fs_state = nv50_fp_state_bind; + pipe->bind_gs_state = nv50_gp_state_bind; + pipe->delete_vs_state = nv50_sp_state_delete; + pipe->delete_fs_state = nv50_sp_state_delete; + pipe->delete_gs_state = nv50_sp_state_delete; + + pipe->set_blend_color = nv50_set_blend_color; + pipe->set_stencil_ref = nv50_set_stencil_ref; + pipe->set_clip_state = nv50_set_clip_state; + pipe->set_sample_mask = nv50_set_sample_mask; + pipe->set_constant_buffer = nv50_set_constant_buffer; + pipe->set_framebuffer_state = nv50_set_framebuffer_state; + pipe->set_polygon_stipple = nv50_set_polygon_stipple; + pipe->set_scissor_state = nv50_set_scissor_state; + pipe->set_viewport_state = nv50_set_viewport_state; + + pipe->create_vertex_elements_state = nv50_vertex_state_create; + pipe->delete_vertex_elements_state = nv50_vertex_state_delete; + pipe->bind_vertex_elements_state = nv50_vertex_state_bind; + + pipe->set_vertex_buffers = nv50_set_vertex_buffers; + pipe->set_index_buffer = nv50_set_index_buffer; + + pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index ae02143e35..f3d45eb95e 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -1,449 +1,342 @@ -/* - * Copyright 2008 Ben Skeggs - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "util/u_format.h" #include "nv50_context.h" -#include "nv50_resource.h" -#include "nouveau/nouveau_stateobj.h" +#include "os/os_time.h" -static struct nouveau_stateobj * -validate_fb(struct nv50_context *nv50) +static void +nv50_validate_fb(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(32, 79, 18); - struct pipe_framebuffer_state *fb = &nv50->framebuffer; - unsigned i, w = 0, h = 0, gw = 0; - - /* Set nr of active RTs and select RT for each colour output. - * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. - * Ambiguous assignment results in no rendering (no DATA_ERROR). - */ - so_method(so, tesla, NV50TCL_RT_CONTROL, 1); - so_data (so, fb->nr_cbufs | - (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | - (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); - - for (i = 0; i < fb->nr_cbufs; i++) { - struct pipe_resource *pt = fb->cbufs[i]->texture; - struct nouveau_bo *bo = nv50_miptree(pt)->base.bo; - - if (!gw) { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; - gw = 1; - } else { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); - } - - assert(nv50_format_table[fb->cbufs[i]->format].rt); - - so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); - so_data (so, fb->cbufs[i]->width); - so_data (so, fb->cbufs[i]->height); - - so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); - so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM | - NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0); - so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM | - NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); - so_data (so, nv50_format_table[fb->cbufs[i]->format].rt); - so_data (so, nv50_miptree(pt)-> - level[fb->cbufs[i]->u.tex.level].tile_mode << 4); - so_data(so, 0x00000000); - - so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); - so_data (so, 1); - } - - if (fb->zsbuf) { - struct pipe_resource *pt = fb->zsbuf->texture; - struct nouveau_bo *bo = nv50_miptree(pt)->base.bo; - - if (!gw) { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - gw = 1; - } else { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } - - assert(nv50_format_table[fb->zsbuf->format].rt); - - so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); - so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM | - NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0); - so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM | - NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); - so_data (so, nv50_format_table[fb->zsbuf->format].rt); - so_data (so, nv50_miptree(pt)-> - level[fb->zsbuf->u.tex.level].tile_mode << 4); - so_data (so, 0x00000000); - - so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); - so_data (so, 1); - so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); - so_data (so, fb->zsbuf->width); - so_data (so, fb->zsbuf->height); - so_data (so, 0x00010001); - } else { - so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); - so_data (so, w << 16); - so_data (so, h << 16); - /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); - so_data (so, 0); - so_data (so, 0); - /* set screen scissor rectangle */ - so_method(so, tesla, NV50TCL_SCREEN_SCISSOR_HORIZ, 2); - so_data (so, w << 16); - so_data (so, h << 16); - - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + boolean serialize = FALSE; + + nv50_bufctx_reset(nv50, NV50_BUFCTX_FRAME); + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs); + BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2); + OUT_RING (chan, fb->width << 16); + OUT_RING (chan, fb->height << 16); + + MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture); + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); + struct nouveau_bo *bo = mt->base.bo; + uint32_t offset = sf->offset; + + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 5); + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, nv50_format_table[sf->base.format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4); + OUT_RING (chan, mt->layer_stride >> 2); + BEGIN_RING(chan, RING_3D(RT_HORIZ(i)), 2); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + BEGIN_RING(chan, RING_3D(RT_ARRAY_MODE), 1); + OUT_RING (chan, sf->depth); + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + + /* only register for writing, otherwise we'd always serialize here */ + nv50_bufctx_add_resident(nv50, NV50_BUFCTX_FRAME, &mt->base, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } + + if (fb->zsbuf) { + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nouveau_bo *bo = mt->base.bo; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + uint32_t offset = sf->offset; + + MARK_RING (chan, 12, 2); + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, nv50_format_table[fb->zsbuf->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4); + OUT_RING (chan, mt->layer_stride >> 2); + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (unk << 16) | sf->depth); + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nv50_bufctx_add_resident(nv50, NV50_BUFCTX_FRAME, &mt->base, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } else { + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 0); + } + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, fb->width << 16); + OUT_RING (chan, fb->height << 16); + + if (serialize) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + } } static void -nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so, - unsigned p) +nv50_validate_blend_colour(struct nv50_context *nv50) { - struct nouveau_grobj *eng2d = nv50->screen->eng2d; - unsigned i, j, dw = nv50->sampler_nr[p] * 8; - - if (!dw) - return; - nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM, - p * (32 * 8 * 4), dw * 4); - - so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw); - - for (i = 0; i < nv50->sampler_nr[p]; ++i) { - if (nv50->sampler[p][i]) - so_datap(so, nv50->sampler[p][i]->tsc, 8); - else { - for (j = 0; j < 8; ++j) /* you get punished */ - so_data(so, 0); /* ... for leaving holes */ - } - } -} + struct nouveau_channel *chan = nv50->screen->base.channel; -static struct nouveau_stateobj * -validate_blend(struct nv50_context *nv50) -{ - struct nouveau_stateobj *so = NULL; - so_ref(nv50->blend->so, &so); - return so; + BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4); + OUT_RINGf (chan, nv50->blend_colour.color[0]); + OUT_RINGf (chan, nv50->blend_colour.color[1]); + OUT_RINGf (chan, nv50->blend_colour.color[2]); + OUT_RINGf (chan, nv50->blend_colour.color[3]); } -static struct nouveau_stateobj * -validate_zsa(struct nv50_context *nv50) +static void +nv50_validate_stencil_ref(struct nv50_context *nv50) { - struct nouveau_stateobj *so = NULL; - so_ref(nv50->zsa->so, &so); - return so; -} + struct nouveau_channel *chan = nv50->screen->base.channel; -static struct nouveau_stateobj * -validate_rast(struct nv50_context *nv50) -{ - struct nouveau_stateobj *so = NULL; - so_ref(nv50->rasterizer->so, &so); - return so; + BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1); + OUT_RING (chan, nv50->stencil_ref.ref_value[0]); + BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1); + OUT_RING (chan, nv50->stencil_ref.ref_value[1]); } -static struct nouveau_stateobj * -validate_blend_colour(struct nv50_context *nv50) +static void +nv50_validate_stipple(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(1, 4, 0); - - so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); - so_data (so, fui(nv50->blend_colour.color[0])); - so_data (so, fui(nv50->blend_colour.color[1])); - so_data (so, fui(nv50->blend_colour.color[2])); - so_data (so, fui(nv50->blend_colour.color[3])); - return so; -} + struct nouveau_channel *chan = nv50->screen->base.channel; + unsigned i; -static struct nouveau_stateobj * -validate_stencil_ref(struct nv50_context *nv50) -{ - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(2, 2, 0); - - so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, nv50->stencil_ref.ref_value[0]); - so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, nv50->stencil_ref.ref_value[1]); - return so; + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + OUT_RING(chan, util_bswap32(nv50->stipple.stipple[i])); } -static struct nouveau_stateobj * -validate_stipple(struct nv50_context *nv50) +static void +nv50_validate_scissor(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(1, 32, 0); - int i; - - so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, util_bswap32(nv50->stipple.stipple[i])); - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct pipe_scissor_state *s = &nv50->scissor; +#ifdef NV50_SCISSORS_CLIPPING + struct pipe_viewport_state *vp = &nv50->viewport; + int minx, maxx, miny, maxy; + + if (!(nv50->dirty & + (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) && + nv50->state.scissor == nv50->rast->pipe.scissor) + return; + nv50->state.scissor = nv50->rast->pipe.scissor; + + if (nv50->state.scissor) { + minx = s->minx; + maxx = s->maxx; + miny = s->miny; + maxy = s->maxy; + } else { + minx = 0; + maxx = nv50->framebuffer.width; + miny = 0; + maxy = nv50->framebuffer.height; + } + + minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); + maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); + miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); + maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (maxx << 16) | minx); + OUT_RING (chan, (maxy << 16) | miny); +#else + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (s->maxx << 16) | s->minx); + OUT_RING (chan, (s->maxy << 16) | s->miny); +#endif } -static struct nouveau_stateobj * -validate_scissor(struct nv50_context *nv50) +static void +nv50_validate_viewport(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct pipe_scissor_state *s = &nv50->scissor; - struct nouveau_stateobj *so; - - so = so_new(1, 2, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); - so_data (so, (s->maxx << 16) | s->minx); - so_data (so, (s->maxy << 16) | s->miny); - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + float zmin, zmax; + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); + OUT_RINGf (chan, nv50->viewport.translate[0]); + OUT_RINGf (chan, nv50->viewport.translate[1]); + OUT_RINGf (chan, nv50->viewport.translate[2]); + BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); + OUT_RINGf (chan, nv50->viewport.scale[0]); + OUT_RINGf (chan, nv50->viewport.scale[1]); + OUT_RINGf (chan, nv50->viewport.scale[2]); + + zmin = nv50->viewport.translate[2] - fabsf(nv50->viewport.scale[2]); + zmax = nv50->viewport.translate[2] + fabsf(nv50->viewport.scale[2]); + +#ifdef NV50_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); + OUT_RINGf (chan, zmin); + OUT_RINGf (chan, zmax); +#endif } -static struct nouveau_stateobj * -validate_viewport(struct nv50_context *nv50) +static void +nv50_validate_clip(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(3, 7, 0); - - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); - so_data (so, fui(nv50->viewport.translate[0])); - so_data (so, fui(nv50->viewport.translate[1])); - so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); - so_data (so, fui(nv50->viewport.scale[0])); - so_data (so, fui(nv50->viewport.scale[1])); - so_data (so, fui(nv50->viewport.scale[2])); - - /* no idea what 0f90 does */ - so_method(so, tesla, 0x0f90, 1); - so_data (so, 0); - - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + uint32_t clip; + + if (nv50->clip.depth_clamp) { + clip = + NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1; + } else { + clip = 0; + } + +#ifndef NV50_SCISSORS_CLIPPING + clip |= + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1; +#endif + + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, clip); + + if (nv50->clip.nr) { + BEGIN_RING(chan, RING_3D(CB_ADDR), 1); + OUT_RING (chan, (0 << 8) | NV50_CB_AUX); + BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nv50->clip.nr * 4); + OUT_RINGp (chan, &nv50->clip.ucp[0][0], nv50->clip.nr * 4); + } + + BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1); + OUT_RING (chan, (1 << nv50->clip.nr) - 1); } -static struct nouveau_stateobj * -validate_sampler(struct nv50_context *nv50) +static void +nv50_validate_blend(struct nv50_context *nv50) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so; - unsigned nr = 0, i; - - for (i = 0; i < 3; ++i) - nr += nv50->sampler_nr[i]; - - so = so_new(1 + 5 * 3, 1 + 19 * 3 + nr * 8, 3 * 2); - - nv50_validate_samplers(nv50, so, 0); /* VP */ - nv50_validate_samplers(nv50, so, 2); /* FP */ + struct nouveau_channel *chan = nv50->screen->base.channel; - so_method(so, tesla, 0x1334, 1); /* flush TSC */ - so_data (so, 0); - - return so; + WAIT_RING(chan, nv50->blend->size); + OUT_RINGp(chan, nv50->blend->state, nv50->blend->size); } -static struct nouveau_stateobj * -validate_vtxbuf(struct nv50_context *nv50) +static void +nv50_validate_zsa(struct nv50_context *nv50) { - struct nouveau_stateobj *so = NULL; - so_ref(nv50->state.vtxbuf, &so); - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + + WAIT_RING(chan, nv50->zsa->size); + OUT_RINGp(chan, nv50->zsa->state, nv50->zsa->size); } -static struct nouveau_stateobj * -validate_vtxattr(struct nv50_context *nv50) +static void +nv50_validate_rasterizer(struct nv50_context *nv50) { - struct nouveau_stateobj *so = NULL; - so_ref(nv50->state.vtxattr, &so); - return so; + struct nouveau_channel *chan = nv50->screen->base.channel; + + WAIT_RING(chan, nv50->rast->size); + OUT_RINGp(chan, nv50->rast->state, nv50->rast->size); } -static struct nouveau_stateobj * -validate_clip(struct nv50_context *nv50) +static void +nv50_switch_pipe_context(struct nv50_context *ctx_to) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(1, 1, 0); - uint32_t vvcc; + struct nv50_context *ctx_from = ctx_to->screen->cur_ctx; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; - /* 0x0000 = remove whole primitive only (xyz) - * 0x1018 = remove whole primitive only (xy), clamp z - * 0x1080 = clip primitive (xyz) - * 0x1098 = clip primitive (xy), clamp z - */ - vvcc = nv50->clip.depth_clamp ? 0x1098 : 0x1080; + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS); - so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); - so_data (so, vvcc); + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NV50_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NV50_NEW_FRAGPROG; - return so; + if (!ctx_to->blend) + ctx_to->dirty &= ~NV50_NEW_BLEND; + if (!ctx_to->rast) + ctx_to->dirty &= ~NV50_NEW_RASTERIZER; + if (!ctx_to->zsa) + ctx_to->dirty &= ~NV50_NEW_ZSA; + + ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = + ctx_to; } -struct state_validate { - struct nouveau_stateobj *(*func)(struct nv50_context *nv50); - unsigned states; +static struct state_validate { + void (*func)(struct nv50_context *); + uint32_t states; } validate_list[] = { - { validate_fb , NV50_NEW_FRAMEBUFFER }, - { validate_blend , NV50_NEW_BLEND }, - { validate_zsa , NV50_NEW_ZSA }, - { nv50_vertprog_validate , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB }, - { nv50_fragprog_validate , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB }, - { nv50_geomprog_validate , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB }, - { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG | - NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER }, - { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG }, - { validate_rast , NV50_NEW_RASTERIZER }, - { validate_blend_colour , NV50_NEW_BLEND_COLOUR }, - { validate_stencil_ref , NV50_NEW_STENCIL_REF }, - { validate_stipple , NV50_NEW_STIPPLE }, - { validate_scissor , NV50_NEW_SCISSOR }, - { validate_viewport , NV50_NEW_VIEWPORT }, - { validate_sampler , NV50_NEW_SAMPLER }, - { nv50_tex_validate , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER }, - { nv50_vbo_validate , NV50_NEW_ARRAYS }, - { validate_vtxbuf , NV50_NEW_ARRAYS }, - { validate_vtxattr , NV50_NEW_ARRAYS }, - { validate_clip , NV50_NEW_CLIP }, - { NULL , 0 } + { nv50_validate_fb, NV50_NEW_FRAMEBUFFER }, + { nv50_validate_blend, NV50_NEW_BLEND }, + { nv50_validate_zsa, NV50_NEW_ZSA }, + { nv50_validate_rasterizer, NV50_NEW_RASTERIZER }, + { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR }, + { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF }, + { nv50_validate_stipple, NV50_NEW_STIPPLE }, +#ifdef NV50_SCISSORS_CLIPPING + { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | + NV50_NEW_RASTERIZER | + NV50_NEW_FRAMEBUFFER }, +#else + { nv50_validate_scissor, NV50_NEW_SCISSOR }, +#endif + { nv50_validate_viewport, NV50_NEW_VIEWPORT }, + { nv50_validate_clip, NV50_NEW_CLIP }, + { nv50_vertprog_validate, NV50_NEW_VERTPROG }, + { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, + { nv50_fragprog_validate, NV50_NEW_FRAGPROG }, + { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_GMTYPROG }, + { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, + { nv50_sprite_coords_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, + { nv50_validate_textures, NV50_NEW_TEXTURES }, + { nv50_validate_samplers, NV50_NEW_SAMPLERS }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) boolean -nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords) +nv50_state_validate(struct nv50_context *nv50) { - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4; - int ret, i; - - for (i = 0; i < validate_list_len; i++) { - struct state_validate *validate = &validate_list[i]; - struct nouveau_stateobj *so; - - if (!(nv50->dirty & validate->states)) - continue; - - so = validate->func(nv50); - if (!so) - continue; - - nr_dwords += (so->total + so->cur); - nr_relocs += so->cur_reloc; - - so_ref(so, &nv50->state.hw[i]); - so_ref(NULL, &so); - nv50->state.hw_dirty |= (1 << i); - } - nv50->dirty = 0; - - if (nv50->screen->cur_ctx != nv50) { - for (i = 0; i < validate_list_len; i++) { - if (!nv50->state.hw[i] || - (nv50->state.hw_dirty & (1 << i))) - continue; - - nr_dwords += (nv50->state.hw[i]->total + - nv50->state.hw[i]->cur); - nr_relocs += nv50->state.hw[i]->cur_reloc; - nv50->state.hw_dirty |= (1 << i); - } - - nv50->screen->cur_ctx = nv50; - } - - ret = MARK_RING(chan, nr_dwords, nr_relocs); - if (ret) { - debug_printf("MARK_RING(%d, %d) failed: %d\n", - nr_dwords, nr_relocs, ret); - return FALSE; - } - - while (nv50->state.hw_dirty) { - i = ffs(nv50->state.hw_dirty) - 1; - nv50->state.hw_dirty &= ~(1 << i); - - so_emit(chan, nv50->state.hw[i]); - } - - /* Yes, really, we need to do this. If a buffer that is referenced - * on the hardware isn't part of changed state above, without doing - * this the kernel is given no clue that the buffer is being used - * still. This can cause all sorts of fun issues. - */ - nv50_tex_relocs(nv50); - so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */ - so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */ - so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */ - so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */ - nv50_screen_relocs(nv50->screen); - - /* No idea.. */ - BEGIN_RING(chan, tesla, 0x142c, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x142c, 1); - OUT_RING (chan, 0); - return TRUE; -} + unsigned i; -void nv50_so_init_sifc(struct nv50_context *nv50, - struct nouveau_stateobj *so, - struct nouveau_bo *bo, unsigned reloc, - unsigned offset, unsigned size) -{ - struct nouveau_grobj *eng2d = nv50->screen->eng2d; - - reloc |= NOUVEAU_BO_WR; - - so_method(so, eng2d, NV50_2D_DST_FORMAT, 2); - so_data (so, NV50_2D_DST_FORMAT_R8_UNORM); - so_data (so, 1); - so_method(so, eng2d, NV50_2D_DST_PITCH, 5); - so_data (so, 262144); - so_data (so, 65536); - so_data (so, 1); - so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); - so_data (so, 0); - so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); - so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); - so_data (so, size); - so_data (so, 1); - so_data (so, 0); - so_data (so, 1); - so_data (so, 0); - so_data (so, 1); - so_data (so, 0); - so_data (so, 0); - so_data (so, 0); - so_data (so, 0); + if (nv50->screen->cur_ctx != nv50) + nv50_switch_pipe_context(nv50); + + if (nv50->dirty) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (nv50->dirty & validate->states) + validate->func(nv50); + } + nv50->dirty = 0; + } + + nv50_bufctx_emit_relocs(nv50); + + return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h new file mode 100644 index 0000000000..515e3e78d4 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -0,0 +1,55 @@ + +#ifndef __NV50_STATEOBJ_H__ +#define __NV50_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define NV50_SCISSORS_CLIPPING + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = \ + ((s) << 18) | (NV50_SUBCH_3D << 13) | NV50_3D_##m + +#define SB_BEGIN_3D_(so, m, s) \ + (so)->state[(so)->size++] = \ + ((s) << 18) | (NV50_SUBCH_3D << 13) | m + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +#include "nv50_stateobj_tex.h" + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[78]; +}; + +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[40]; +}; + +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[29]; +}; + +struct nv50_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; +}; + +struct nv50_vertex_stateobj { + struct translate *translate; + unsigned num_elements; + uint32_t instance_elts; + uint32_t instance_bufs; + boolean need_conversion; + unsigned vertex_size; + unsigned packet_vertex_limit; + struct nv50_vertex_element element[0]; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nv50/nv50_stateobj_tex.h new file mode 100644 index 0000000000..99548cbdb4 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_stateobj_tex.h @@ -0,0 +1,34 @@ + +#ifndef __NV50_STATEOBJ_TEX_H__ +#define __NV50_STATEOBJ_TEX_H__ + +#include "pipe/p_state.h" + +struct nv50_tsc_entry { + int id; + uint32_t tsc[8]; +}; + +static INLINE struct nv50_tsc_entry * +nv50_tsc_entry(void *hwcso) +{ + return (struct nv50_tsc_entry *)hwcso; +} + +struct nv50_tic_entry { + struct pipe_sampler_view pipe; + int id; + uint32_t tic[8]; +}; + +static INLINE struct nv50_tic_entry * +nv50_tic_entry(struct pipe_sampler_view *view) +{ + return (struct nv50_tic_entry *)view; +} + +extern void * +nv50_sampler_state_create(struct pipe_context *, + const struct pipe_sampler_state *); + +#endif /* __NV50_STATEOBJ_TEX_H__ */ diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index ce48022db4..dc9e2880f0 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -20,306 +20,362 @@ * SOFTWARE. */ -#define __NOUVEAU_PUSH_H__ #include <stdint.h> -#include "nouveau/nouveau_pushbuf.h" -#include "nv50_context.h" -#include "nv50_resource.h" + #include "pipe/p_defines.h" + #include "util/u_inlines.h" #include "util/u_pack_color.h" - #include "util/u_format.h" +#include "nv50_context.h" +#include "nv50_resource.h" + +#include "nv50_defs.xml.h" + /* return TRUE for formats that can be converted among each other by NV50_2D */ static INLINE boolean nv50_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + return TRUE; + default: + return FALSE; + } } static INLINE uint8_t nv50_2d_format(enum pipe_format format) { - uint8_t id = nv50_format_table[format].rt; - - /* Hardware values for color formats range from 0xc0 to 0xff, - * but the 2D engine doesn't support all of them. - */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) - return id; - - switch (util_format_get_blocksize(format)) { - case 1: - return NV50_2D_DST_FORMAT_R8_UNORM; - case 2: - return NV50_2D_DST_FORMAT_R16_UNORM; - case 4: - return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; - default: - return 0; - } + uint8_t id = nv50_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + return id; + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + default: + return 0; + } } static int -nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) +nv50_2d_texture_set(struct nouveau_channel *chan, int dst, + struct nv50_miptree *mt, unsigned level, unsigned layer) { - struct nv50_miptree *mt = nv50_miptree(ps->texture); - struct nouveau_channel *chan = screen->eng2d->channel; - struct nouveau_grobj *eng2d = screen->eng2d; - struct nouveau_bo *bo = nv50_miptree(ps->texture)->base.bo; - int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; - int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); - - format = nv50_2d_format(ps->format); - if (!format) { - NOUVEAU_ERR("invalid/unsupported surface format: %s\n", - util_format_name(ps->format)); - return 1; - } - - if (!nouveau_bo_tile_layout(bo)) { - BEGIN_RING(chan, eng2d, mthd, 2); - OUT_RING (chan, format); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[ps->u.tex.level].pitch); - OUT_RING (chan, ps->width); - OUT_RING (chan, ps->height); - OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags); - OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags); - } else { - BEGIN_RING(chan, eng2d, mthd, 5); - OUT_RING (chan, format); - OUT_RING (chan, 0); - OUT_RING (chan, mt->level[ps->u.tex.level].tile_mode << 4); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, eng2d, mthd + 0x18, 4); - OUT_RING (chan, ps->width); - OUT_RING (chan, ps->height); - OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags); - OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags); - } - + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + uint32_t offset = mt->level[level].offset; + + format = nv50_2d_format(mt->base.base.format); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(mt->base.base.format)); + return 1; + } + + width = u_minify(mt->base.base.width0, level); + height = u_minify(mt->base.base.height0, level); + + offset = mt->level[level].offset; + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + depth = 1; + layer = 0; + } else { + depth = u_minify(mt->base.base.depth0, level); + } + + if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) { + BEGIN_RING(chan, RING_2D_(mthd), 2); + OUT_RING (chan, format); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5); + OUT_RING (chan, mt->level[level].pitch); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); + } else { + BEGIN_RING(chan, RING_2D_(mthd), 5); + OUT_RING (chan, format); + OUT_RING (chan, 0); + OUT_RING (chan, mt->level[level].tile_mode << 4); + OUT_RING (chan, depth); + OUT_RING (chan, layer); + BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); + } + #if 0 - if (dst) { - BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, surf->width); - OUT_RING (chan, surf->height); - } + if (dst) { + BEGIN_RING(chan, RING_2D_(NV50_2D_CLIP_X), 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, width); + OUT_RING (chan, height); + } #endif - - return 0; + return 0; } -int -nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h) +static int +nv50_2d_texture_do_copy(struct nouveau_channel *chan, + struct nv50_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nv50_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) { - struct nouveau_channel *chan = screen->eng2d->channel; - struct nouveau_grobj *eng2d = screen->eng2d; - int ret; - - ret = MARK_RING(chan, 2*16 + 32, 4); - if (ret) - return ret; - - ret = nv50_surface_set(screen, dst, 1); - if (ret) - return ret; - - ret = nv50_surface_set(screen, src, 0); - if (ret) - return ret; - - BEGIN_RING(chan, eng2d, 0x088c, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); - OUT_RING (chan, dx); - OUT_RING (chan, dy); - OUT_RING (chan, w); - OUT_RING (chan, h); - BEGIN_RING(chan, eng2d, 0x08c0, 4); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, 0x08d0, 4); - OUT_RING (chan, 0); - OUT_RING (chan, sx); - OUT_RING (chan, 0); - OUT_RING (chan, sy); - - return 0; + int ret; + + ret = MARK_RING(chan, 2 * 16 + 32, 4); + if (ret) + return ret; + + ret = nv50_2d_texture_set(chan, 1, dst, dst_level, dz); + if (ret) + return ret; + + ret = nv50_2d_texture_set(chan, 0, src, src_level, sz); + if (ret) + return ret; + + /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ + BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); + + return 0; } static void -nv50_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dest, unsigned dst_level, - unsigned destx, unsigned desty, unsigned destz, - struct pipe_resource *src, unsigned src_level, - const struct pipe_box *src_box) +nv50_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nv50_screen *screen = nv50->screen; - struct pipe_surface *ps_dst, *ps_src, surf_tmpl; - - - assert((src->format == dest->format) || - (nv50_2d_format_faithful(src->format) && - nv50_2d_format_faithful(dest->format))); - assert(src_box->depth == 1); - - memset(&surf_tmpl, 0, sizeof(surf_tmpl)); - surf_tmpl.format = src->format; - surf_tmpl.usage = 0; /* no bind flag - not a surface */ - surf_tmpl.u.tex.level = src_level; - surf_tmpl.u.tex.first_layer = src_box->z; - surf_tmpl.u.tex.last_layer = src_box->z; - /* XXX really need surfaces here? */ - ps_src = nv50_miptree_surface_new(pipe, src, &surf_tmpl); - surf_tmpl.format = dest->format; - surf_tmpl.usage = 0; /* no bind flag - not a surface */ - surf_tmpl.u.tex.level = dst_level; - surf_tmpl.u.tex.first_layer = destz; - surf_tmpl.u.tex.last_layer = destz; - ps_dst = nv50_miptree_surface_new(pipe, dest, &surf_tmpl); - - nv50_surface_do_copy(screen, ps_dst, destx, desty, ps_src, src_box->x, - src_box->y, src_box->width, src_box->height); - - nv50_miptree_surface_del(pipe, ps_src); - nv50_miptree_surface_del(pipe, ps_dst); + struct nv50_screen *screen = nv50_context(pipe)->screen; + int ret; + unsigned dst_layer = dstz, src_layer = src_box->z; + + assert((src->format == dst->format) || + (nv50_2d_format_faithful(src->format) && + nv50_2d_format_faithful(dst->format))); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nv50_2d_texture_do_copy(screen->base.channel, + nv50_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nv50_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + return; + } } static void nv50_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nv50_screen *screen = nv50->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *tesla = screen->tesla; - struct nv50_miptree *mt = nv50_miptree(dst->texture); - struct nouveau_bo *bo = mt->base.bo; - - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4); - OUT_RINGf (chan, rgba[0]); - OUT_RINGf (chan, rgba[1]); - OUT_RINGf (chan, rgba[2]); - OUT_RINGf (chan, rgba[3]); - - if (MARK_RING(chan, 18, 2)) - return; - - BEGIN_RING(chan, tesla, NV50TCL_RT_CONTROL, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5); - OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RING (chan, nv50_format_table[dst->format].rt); - OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); - BEGIN_RING(chan, tesla, NV50TCL_RT_ARRAY_MODE, 1); - OUT_RING (chan, 1); - - /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ - - BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); - - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); - OUT_RING (chan, 0x3c); - - nv50->dirty |= NV50_NEW_FRAMEBUFFER; + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); + + if (MARK_RING(chan, 18, 2)) + return; + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 5); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nv50_format_table[dst->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(RT_HORIZ(0)), 2); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + BEGIN_RING(chan, RING_3D(RT_ARRAY_MODE), 1); + OUT_RING (chan, 1); + + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, 0x3c); + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; } static void nv50_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); + OUT_RINGf (chan, depth); + mode |= NV50_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); + OUT_RING (chan, stencil & 0xff); + mode |= NV50_3D_CLEAR_BUFFERS_S; + } + + if (MARK_RING(chan, 17, 2)) + return; + + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nv50_format_table[dst->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode << 4); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (1 << 16) | 1); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, mode); + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +void +nv50_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nv50_screen *screen = nv50->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *tesla = screen->tesla; - struct nv50_miptree *mt = nv50_miptree(dst->texture); - struct nouveau_bo *bo = mt->base.bo; - uint32_t mode = 0; - - if (clear_flags & PIPE_CLEAR_DEPTH) { - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1); - OUT_RINGf (chan, depth); - mode |= NV50TCL_CLEAR_BUFFERS_Z; - } - - if (clear_flags & PIPE_CLEAR_STENCIL) { - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1); - OUT_RING (chan, stencil & 0xff); - mode |= NV50TCL_CLEAR_BUFFERS_S; - } - - if (MARK_RING(chan, 17, 2)) - return; - - BEGIN_RING(chan, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); - OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RING (chan, nv50_format_table[dst->format].rt); - OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, NV50TCL_ZETA_ENABLE, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, NV50TCL_ZETA_HORIZ, 3); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); - OUT_RING (chan, (1 << 16) | 1); - - BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); - - BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); - OUT_RING (chan, mode); - - nv50->dirty |= NV50_NEW_FRAMEBUFFER; + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + const unsigned dirty = nv50->dirty; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + nv50->dirty &= NV50_NEW_FRAMEBUFFER; + if (!nv50_state_validate(nv50)) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); + mode = + NV50_3D_CLEAR_BUFFERS_R | NV50_3D_CLEAR_BUFFERS_G | + NV50_3D_CLEAR_BUFFERS_B | NV50_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); + OUT_RING (chan, fui(depth)); + mode |= NV50_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); + OUT_RING (chan, stencil & 0xff); + mode |= NV50_3D_CLEAR_BUFFERS_S; + } + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, (i << 6) | 0x3c); + } + + nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER; } void nv50_init_surface_functions(struct nv50_context *nv50) { - nv50->pipe.resource_copy_region = nv50_surface_copy; - nv50->pipe.clear_render_target = nv50_clear_render_target; - nv50->pipe.clear_depth_stencil = nv50_clear_depth_stencil; + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->resource_copy_region = nv50_resource_copy_region; + pipe->clear_render_target = nv50_clear_render_target; + pipe->clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 9243f9edce..9192d2e259 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -21,217 +21,298 @@ */ #include "nv50_context.h" -#include "nv50_texture.h" #include "nv50_resource.h" - -#include "nouveau/nouveau_stateobj.h" -#include "nouveau/nouveau_reloc.h" +#include "nv50_texture.xml.h" +#include "nv50_defs.xml.h" #include "util/u_format.h" +#define NV50_TIC_0_SWIZZLE__MASK \ + (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ + NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) + static INLINE uint32_t -nv50_tic_swizzle(uint32_t tc, unsigned swz) +nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int) { - switch (swz) { - case PIPE_SWIZZLE_RED: - return (tc & NV50TIC_0_0_MAPR_MASK) >> NV50TIC_0_0_MAPR_SHIFT; - case PIPE_SWIZZLE_GREEN: - return (tc & NV50TIC_0_0_MAPG_MASK) >> NV50TIC_0_0_MAPG_SHIFT; - case PIPE_SWIZZLE_BLUE: - return (tc & NV50TIC_0_0_MAPB_MASK) >> NV50TIC_0_0_MAPB_SHIFT; - case PIPE_SWIZZLE_ALPHA: - return (tc & NV50TIC_0_0_MAPA_MASK) >> NV50TIC_0_0_MAPA_SHIFT; - case PIPE_SWIZZLE_ONE: - return 7; - case PIPE_SWIZZLE_ZERO: - default: - return 0; - } + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } } -boolean -nv50_tex_construct(struct nv50_sampler_view *view) +struct pipe_sampler_view * +nv50_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) { - const struct util_format_description *desc; - struct nv50_miptree *mt = nv50_miptree(view->pipe.texture); - uint32_t swz[4], *tic = view->tic; - - tic[0] = nv50_format_table[view->pipe.format].tic; - - swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r); - swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g); - swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b); - swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a); - view->tic[0] = (tic[0] & ~NV50TIC_0_0_SWIZZLE_MASK) | - (swz[0] << NV50TIC_0_0_MAPR_SHIFT) | - (swz[1] << NV50TIC_0_0_MAPG_SHIFT) | - (swz[2] << NV50TIC_0_0_MAPB_SHIFT) | - (swz[3] << NV50TIC_0_0_MAPA_SHIFT); - - tic[2] = 0x50001000; - tic[2] |= ((mt->base.bo->tile_mode & 0x0f) << 22) | - ((mt->base.bo->tile_mode & 0xf0) << 21); - - desc = util_format_description(mt->base.base.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - tic[2] |= NV50TIC_0_2_COLORSPACE_SRGB; - - switch (mt->base.base.target) { - case PIPE_TEXTURE_1D: - tic[2] |= NV50TIC_0_2_TARGET_1D; - break; - case PIPE_TEXTURE_2D: - tic[2] |= NV50TIC_0_2_TARGET_2D; - break; - case PIPE_TEXTURE_RECT: - tic[2] |= NV50TIC_0_2_TARGET_RECT; - break; - case PIPE_TEXTURE_3D: - tic[2] |= NV50TIC_0_2_TARGET_3D; - break; - case PIPE_TEXTURE_CUBE: - tic[2] |= NV50TIC_0_2_TARGET_CUBE; - break; - default: - NOUVEAU_ERR("invalid texture target: %d\n", - mt->base.base.target); - return FALSE; - } - - tic[3] = 0x00300000; - - tic[4] = (1 << 31) | mt->base.base.width0; - tic[5] = (mt->base.base.last_level << 28) | - (mt->base.base.depth0 << 16) | mt->base.base.height0; - - tic[6] = 0x03000000; - - tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; - - return TRUE; + const struct util_format_description *desc; + uint32_t *tic; + uint32_t swz[4]; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt = nv50_miptree(texture); + boolean tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + /* TIC[0] */ + + tic[0] = nv50_format_table[view->pipe.format].tic; + + tex_int = FALSE; /* XXX: integer textures */ + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + tic[1] = /* mt->base.bo->offset; */ 0; + tic[2] = /* mt->base.bo->offset >> 32 */ 0; + + tic[2] |= 0x10001000 | NV50_TIC_2_NO_BORDER; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (mt->base.base.target != PIPE_TEXTURE_RECT) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + tic[2] |= + ((mt->base.bo->tile_mode & 0x0f) << (22 - 0)) | + ((mt->base.bo->tile_mode & 0xf0) << (25 - 4)); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); + + if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY || + mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) { + tic[1] = view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } + + switch (mt->base.base.target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + if (depth > 1) + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + else + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_BUFFER: + tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; + break; + default: + NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); + return FALSE; + } + + if (mt->base.base.target == PIPE_BUFFER) + tic[3] = mt->base.base.width0; + else + tic[3] = 0x00300000; + + tic[4] = (1 << 31) | mt->base.base.width0; + + tic[5] = mt->base.base.height0 & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << 28; + + tic[6] = 0x03000000; + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + + return &view->pipe; } -static int -nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, - unsigned p) +static boolean +nv50_validate_tic(struct nv50_context *nv50, int s) { - struct nouveau_grobj *eng2d = nv50->screen->eng2d; - struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned unit, j; - - const unsigned rll = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW; - const unsigned rlh = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH - | NOUVEAU_BO_OR; - - nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM, - p * (32 * 8 * 4), nv50->sampler_view_nr[p] * 8 * 4); - - for (unit = 0; unit < nv50->sampler_view_nr[p]; ++unit) { - struct nv50_sampler_view *view = - nv50_sampler_view(nv50->sampler_views[p][unit]); - - so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); - if (view) { - uint32_t tic2 = view->tic[2]; - struct nv50_miptree *mt = - nv50_miptree(view->pipe.texture); - - tic2 &= ~NV50TIC_0_2_NORMALIZED_COORDS; - if (nv50->sampler[p][unit]->normalized) - tic2 |= NV50TIC_0_2_NORMALIZED_COORDS; - view->tic[2] = tic2; - - so_data (so, view->tic[0]); - so_reloc (so, mt->base.bo, 0, rll, 0, 0); - so_reloc (so, mt->base.bo, 0, rlh, tic2, tic2); - so_datap (so, &view->tic[3], 5); - - /* Set TEX insn $t src binding $unit in program type p - * to TIC, TSC entry (32 * p + unit), mark valid (1). - */ - so_method(so, tesla, NV50TCL_BIND_TIC(p), 1); - so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1); - } else { - for (j = 0; j < 8; ++j) - so_data(so, 0); - so_method(so, tesla, NV50TCL_BIND_TIC(p), 1); - so_data (so, (unit << 1) | 0); - } - } - - for (; unit < nv50->state.sampler_view_nr[p]; unit++) { - /* Make other bindings invalid. */ - so_method(so, tesla, NV50TCL_BIND_TIC(p), 1); - so_data (so, (unit << 1) | 0); - } - - nv50->state.sampler_view_nr[p] = nv50->sampler_view_nr[p]; - return TRUE; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_bo *txc = nv50->screen->txc; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nv50->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); + struct nv04_resource *res; + + if (!tic) { + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (i << 1) | 0); + continue; + } + res = &nv50_miptree(tic->pipe.texture)->base; + + if (tic->id < 0) { + uint32_t offset = tic->tic[1]; + + tic->id = nv50_screen_tic_alloc(nv50->screen, tic); + + MARK_RING (chan, 24 + 8, 4); + BEGIN_RING(chan, RING_2D(DST_FORMAT), 2); + OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D(DST_PITCH), 5); + OUT_RING (chan, 262144); + OUT_RING (chan, 65536); + OUT_RING (chan, 1); + OUT_RELOCh(chan, txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10); + OUT_RING (chan, 32); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, tic->id * 32); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING_NI(chan, RING_2D(SIFC_DATA), 8); + OUT_RING (chan, tic->tic[0]); + OUT_RELOCl(chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOC (chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]); + OUT_RINGp (chan, &tic->tic[3], 5); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, 0x20); //(tic->id << 4) | 1); + } + + nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nv50_bufctx_add_resident(nv50, NV50_BUFCTX_TEXTURES, res, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (tic->id << 9) | (i << 1) | 1); + } + for (; i < nv50->state.num_textures[s]; ++i) { + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (i << 1) | 0); + } + nv50->state.num_textures[s] = nv50->num_textures[s]; + + return need_flush; } -static void -nv50_emit_texture_relocs(struct nv50_context *nv50, int prog) +void nv50_validate_textures(struct nv50_context *nv50) { - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_bo *tic = nv50->screen->tic; - int unit; - - for (unit = 0; unit < nv50->sampler_view_nr[prog]; unit++) { - struct nv50_sampler_view *view; - struct nv50_miptree *mt; - const unsigned base = ((prog * 32) + unit) * 32; - - view = nv50_sampler_view(nv50->sampler_views[prog][unit]); - if (!view) - continue; - mt = nv50_miptree(view->pipe.texture); - - nouveau_reloc_emit(chan, tic, base + 4, NULL, mt->base.bo, 0, 0, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); - nouveau_reloc_emit(chan, tic, base + 8, NULL, mt->base.bo, 0, 0, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, view->tic[2], view->tic[2]); - } + boolean need_flush; + + need_flush = nv50_validate_tic(nv50, 0); + need_flush |= nv50_validate_tic(nv50, 2); + + if (need_flush) { + BEGIN_RING(nv50->screen->base.channel, RING_3D(TIC_FLUSH), 1); + OUT_RING (nv50->screen->base.channel, 0); + } } -void -nv50_tex_relocs(struct nv50_context *nv50) +static boolean +nv50_validate_tsc(struct nv50_context *nv50, int s) { - nv50_emit_texture_relocs(nv50, 2); /* FP */ - nv50_emit_texture_relocs(nv50, 0); /* VP */ + struct nouveau_channel *chan = nv50->screen->base.channel; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nv50->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]); + + if (!tsc) { + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (i << 4) | 0); + continue; + } + if (tsc->id < 0) { + tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc); + + nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc, + 65536 + tsc->id * 32, + NOUVEAU_BO_VRAM, 32, tsc->tsc); + need_flush = TRUE; + } + nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1); + } + for (; i < nv50->state.num_samplers[s]; ++i) { + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (i << 4) | 0); + } + nv50->state.num_samplers[s] = nv50->num_samplers[s]; + + return need_flush; } -struct nouveau_stateobj * -nv50_tex_validate(struct nv50_context *nv50) +void nv50_validate_samplers(struct nv50_context *nv50) { - struct nouveau_stateobj *so; - struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned p, m = 0, d = 0, r = 0; - - for (p = 0; p < 3; ++p) { - unsigned nr = MAX2(nv50->sampler_view_nr[p], - nv50->state.sampler_view_nr[p]); - m += nr; - d += nr; - r += nv50->sampler_view_nr[p]; - } - m = m * 2 + 3 * 4 + 1; - d = d * 9 + 3 * 19 + 1; - r = r * 2 + 3 * 2; - - so = so_new(m, d, r); - - if (nv50_validate_textures(nv50, so, 0) == FALSE || - nv50_validate_textures(nv50, so, 2) == FALSE) { - so_ref(NULL, &so); - - NOUVEAU_ERR("failed tex validate\n"); - return NULL; - } - - so_method(so, tesla, 0x1330, 1); /* flush TIC */ - so_data (so, 0); - - return so; + boolean need_flush; + + need_flush = nv50_validate_tsc(nv50, 0); + need_flush |= nv50_validate_tsc(nv50, 2); + + if (need_flush) { + BEGIN_RING(nv50->screen->base.channel, RING_3D(TSC_FLUSH), 1); + OUT_RING (nv50->screen->base.channel, 0); + } } diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h deleted file mode 100644 index b4939943e8..0000000000 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ /dev/null @@ -1,197 +0,0 @@ -#ifndef __NV50_TEXTURE_H__ -#define __NV50_TEXTURE_H__ - -/* It'd be really nice to have these in nouveau_class.h generated by - * renouveau like the rest of the object header - but not sure it can - * handle non-object stuff nicely - need to look into it. - */ - -/* Texture image control block */ -#define NV50TIC_0_0_SWIZZLE_MASK 0x3ffc0000 -#define NV50TIC_0_0_MAPA_MASK 0x38000000 -#define NV50TIC_0_0_MAPA_SHIFT 27 -#define NV50TIC_0_0_MAPA_ZERO 0x00000000 -#define NV50TIC_0_0_MAPA_C0 0x10000000 -#define NV50TIC_0_0_MAPA_C1 0x18000000 -#define NV50TIC_0_0_MAPA_C2 0x20000000 -#define NV50TIC_0_0_MAPA_C3 0x28000000 -#define NV50TIC_0_0_MAPA_ONE 0x38000000 -#define NV50TIC_0_0_MAPB_MASK 0x07000000 -#define NV50TIC_0_0_MAPB_SHIFT 24 -#define NV50TIC_0_0_MAPB_ZERO 0x00000000 -#define NV50TIC_0_0_MAPB_C0 0x02000000 -#define NV50TIC_0_0_MAPB_C1 0x03000000 -#define NV50TIC_0_0_MAPB_C2 0x04000000 -#define NV50TIC_0_0_MAPB_C3 0x05000000 -#define NV50TIC_0_0_MAPB_ONE 0x07000000 -#define NV50TIC_0_0_MAPG_MASK 0x00e00000 -#define NV50TIC_0_0_MAPG_SHIFT 21 -#define NV50TIC_0_0_MAPG_ZERO 0x00000000 -#define NV50TIC_0_0_MAPG_C0 0x00400000 -#define NV50TIC_0_0_MAPG_C1 0x00600000 -#define NV50TIC_0_0_MAPG_C2 0x00800000 -#define NV50TIC_0_0_MAPG_C3 0x00a00000 -#define NV50TIC_0_0_MAPG_ONE 0x00e00000 -#define NV50TIC_0_0_MAPR_MASK 0x001c0000 -#define NV50TIC_0_0_MAPR_SHIFT 18 -#define NV50TIC_0_0_MAPR_ZERO 0x00000000 -#define NV50TIC_0_0_MAPR_C0 0x00080000 -#define NV50TIC_0_0_MAPR_C1 0x000c0000 -#define NV50TIC_0_0_MAPR_C2 0x00100000 -#define NV50TIC_0_0_MAPR_C3 0x00140000 -#define NV50TIC_0_0_MAPR_ONE 0x001c0000 -#define NV50TIC_0_0_TYPEA_MASK 0x00038000 -#define NV50TIC_0_0_TYPEA_UNORM 0x00010000 -#define NV50TIC_0_0_TYPEA_SNORM 0x00008000 -#define NV50TIC_0_0_TYPEA_SINT 0x00018000 -#define NV50TIC_0_0_TYPEA_UINT 0x00020000 -#define NV50TIC_0_0_TYPEA_SSCALED 0x00028000 -#define NV50TIC_0_0_TYPEA_USCALED 0x00030000 -#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000 -#define NV50TIC_0_0_TYPEB_MASK 0x00007000 -#define NV50TIC_0_0_TYPEB_UNORM 0x00002000 -#define NV50TIC_0_0_TYPEB_SNORM 0x00001000 -#define NV50TIC_0_0_TYPEB_SINT 0x00003000 -#define NV50TIC_0_0_TYPEB_UINT 0x00004000 -#define NV50TIC_0_0_TYPEB_SSCALED 0x00005000 -#define NV50TIC_0_0_TYPEB_USCALED 0x00006000 -#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000 -#define NV50TIC_0_0_TYPEG_MASK 0x00000e00 -#define NV50TIC_0_0_TYPEG_UNORM 0x00000400 -#define NV50TIC_0_0_TYPEG_SNORM 0x00000200 -#define NV50TIC_0_0_TYPEG_SINT 0x00000600 -#define NV50TIC_0_0_TYPEG_UINT 0x00000800 -#define NV50TIC_0_0_TYPEG_SSCALED 0x00000a00 -#define NV50TIC_0_0_TYPEG_USCALED 0x00000c00 -#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00 -#define NV50TIC_0_0_TYPER_MASK 0x000001c0 -#define NV50TIC_0_0_TYPER_UNORM 0x00000080 -#define NV50TIC_0_0_TYPER_SNORM 0x00000040 -#define NV50TIC_0_0_TYPER_SINT 0x000000c0 -#define NV50TIC_0_0_TYPER_UINT 0x00000100 -#define NV50TIC_0_0_TYPER_SSCALED 0x00000140 -#define NV50TIC_0_0_TYPER_USCALED 0x00000180 -#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0 -#define NV50TIC_0_0_FMT_MASK 0x0000003f -#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001 -#define NV50TIC_0_0_FMT_16_16_16_16 0x00000003 -#define NV50TIC_0_0_FMT_32_32 0x00000004 -#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 -#define NV50TIC_0_0_FMT_2_10_10_10 0x00000009 -#define NV50TIC_0_0_FMT_16_16 0x0000000c -#define NV50TIC_0_0_FMT_32 0x0000000f -#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 -/* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */ -#define NV50TIC_0_0_FMT_1_5_5_5 0x00000014 -#define NV50TIC_0_0_FMT_5_6_5 0x00000015 -#define NV50TIC_0_0_FMT_8_8 0x00000018 -#define NV50TIC_0_0_FMT_16 0x0000001b -#define NV50TIC_0_0_FMT_8 0x0000001d -#define NV50TIC_0_0_FMT_5_9_9_9 0x00000020 -#define NV50TIC_0_0_FMT_10_11_11 0x00000021 -#define NV50TIC_0_0_FMT_DXT1 0x00000024 -#define NV50TIC_0_0_FMT_DXT3 0x00000025 -#define NV50TIC_0_0_FMT_DXT5 0x00000026 -#define NV50TIC_0_0_FMT_RGTC1 0x00000027 -#define NV50TIC_0_0_FMT_RGTC2 0x00000028 -#define NV50TIC_0_0_FMT_24_8 0x00000029 -#define NV50TIC_0_0_FMT_8_24 0x0000002a -#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f -#define NV50TIC_0_0_FMT_32_8 0x00000030 -#define NV50TIC_0_0_FMT_16_DEPTH 0x0000003a - -#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff -#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 - -#define NV50TIC_0_2_COLORSPACE_SRGB 0x00000400 -#define NV50TIC_0_2_TARGET_1D 0x00000000 -#define NV50TIC_0_2_TARGET_2D 0x00004000 -#define NV50TIC_0_2_TARGET_3D 0x00008000 -#define NV50TIC_0_2_TARGET_CUBE 0x0000c000 -#define NV50TIC_0_2_TARGET_1D_ARRAY 0x00010000 -#define NV50TIC_0_2_TARGET_2D_ARRAY 0x00014000 -#define NV50TIC_0_2_TARGET_BUFFER 0x00018000 -#define NV50TIC_0_2_TARGET_RECT 0x0001c000 -/* #define NV50TIC_0_0_TILE_MODE_LINEAR 0x00040000 */ -#define NV50TIC_0_2_TILE_MODE_Y_MASK 0x01c00000 -#define NV50TIC_0_2_TILE_MODE_Y_SHIFT 22 -#define NV50TIC_0_2_TILE_MODE_Z_MASK 0x0e000000 -#define NV50TIC_0_2_TILE_MODE_Z_SHIFT 25 -#define NV50TIC_0_2_NORMALIZED_COORDS 0x80000000 - -#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff - -#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff -#define NV50TIC_0_4_WIDTH_SHIFT 0 - -#define NV50TIC_0_5_LAST_LEVEL_MASK 0xf0000000 -#define NV50TIC_0_5_LAST_LEVEL_SHIFT 28 -#define NV50TIC_0_5_DEPTH_MASK 0x0fff0000 -#define NV50TIC_0_5_DEPTH_SHIFT 16 -#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff -#define NV50TIC_0_5_HEIGHT_SHIFT 0 -#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff - -#define NV50TIC_0_7_BASE_LEVEL_MASK 0x0000000f -#define NV50TIC_0_7_BASE_LEVEL_SHIFT 0 -#define NV50TIC_0_7_MAX_LEVEL_MASK 0x000000f0 -#define NV50TIC_0_7_MAX_LEVEL_SHIFT 4 - -/* Texture sampler control block */ -#define NV50TSC_1_0_WRAPS_MASK 0x00000007 -#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000 -#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001 -#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002 -#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003 -#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004 -#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005 -#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006 -#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007 -#define NV50TSC_1_0_WRAPT_MASK 0x00000038 -#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000 -#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008 -#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010 -#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018 -#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020 -#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028 -#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030 -#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038 -#define NV50TSC_1_0_WRAPR_MASK 0x000001c0 -#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000 -#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040 -#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080 -#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0 -#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100 -#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140 -#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180 -#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0 -#define NV50TSC_1_0_MAX_ANISOTROPY_MASK 0x00700000 - -#define NV50TSC_1_1_MAGF_MASK 0x00000003 -#define NV50TSC_1_1_MAGF_NEAREST 0x00000001 -#define NV50TSC_1_1_MAGF_LINEAR 0x00000002 -#define NV50TSC_1_1_MINF_MASK 0x00000030 -#define NV50TSC_1_1_MINF_NEAREST 0x00000010 -#define NV50TSC_1_1_MINF_LINEAR 0x00000020 -#define NV50TSC_1_1_MIPF_MASK 0x000000c0 -#define NV50TSC_1_1_MIPF_NONE 0x00000040 -#define NV50TSC_1_1_MIPF_NEAREST 0x00000080 -#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 -#define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000 -#define NV50TSC_1_1_UNKN_ANISO_15 0x10000000 -#define NV50TSC_1_1_UNKN_ANISO_35 0x18000000 - -#define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00 -#define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000 - -#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff - -#define NV50TSC_1_4_BORDER_COLOR_RED_MASK 0xffffffff - -#define NV50TSC_1_5_BORDER_COLOR_GREEN_MASK 0xffffffff - -#define NV50TSC_1_6_BORDER_COLOR_BLUE_MASK 0xffffffff - -#define NV50TSC_1_7_BORDER_COLOR_ALPHA_MASK 0xffffffff - -#endif diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h new file mode 100644 index 0000000000..e0cbbdf0d7 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_texture.xml.h @@ -0,0 +1,279 @@ +#ifndef NV50_TEXTURE_XML +#define NV50_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_texture.xml ( 8377 bytes, from 2011-02-12 12:05:21) +- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_TIC_MAP_ZERO 0x00000000 +#define NV50_TIC_MAP_C0 0x00000002 +#define NV50_TIC_MAP_C1 0x00000003 +#define NV50_TIC_MAP_C2 0x00000004 +#define NV50_TIC_MAP_C3 0x00000005 +#define NV50_TIC_MAP_ONE_INT 0x00000006 +#define NV50_TIC_MAP_ONE_FLOAT 0x00000007 +#define NV50_TIC_TYPE_SNORM 0x00000001 +#define NV50_TIC_TYPE_UNORM 0x00000002 +#define NV50_TIC_TYPE_SINT 0x00000003 +#define NV50_TIC_TYPE_UINT 0x00000004 +#define NV50_TIC_TYPE_SSCALED 0x00000005 +#define NV50_TIC_TYPE_USCALED 0x00000006 +#define NV50_TIC_TYPE_FLOAT 0x00000007 +#define NV50_TSC_WRAP_REPEAT 0x00000000 +#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 +#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 +#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 +#define NV50_TSC_WRAP_CLAMP 0x00000004 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 +#define NV50_TIC__SIZE 0x00000020 +#define NV50_TIC_0 0x00000000 +#define NV50_TIC_0_MAPA__MASK 0x38000000 +#define NV50_TIC_0_MAPA__SHIFT 27 +#define NV50_TIC_0_MAPB__MASK 0x07000000 +#define NV50_TIC_0_MAPB__SHIFT 24 +#define NV50_TIC_0_MAPG__MASK 0x00e00000 +#define NV50_TIC_0_MAPG__SHIFT 21 +#define NV50_TIC_0_MAPR__MASK 0x001c0000 +#define NV50_TIC_0_MAPR__SHIFT 18 +#define NV50_TIC_0_TYPE3__MASK 0x00038000 +#define NV50_TIC_0_TYPE3__SHIFT 15 +#define NV50_TIC_0_TYPE2__MASK 0x00007000 +#define NV50_TIC_0_TYPE2__SHIFT 12 +#define NV50_TIC_0_TYPE1__MASK 0x00000e00 +#define NV50_TIC_0_TYPE1__SHIFT 9 +#define NV50_TIC_0_TYPE0__MASK 0x000001c0 +#define NV50_TIC_0_TYPE0__SHIFT 6 +#define NV50_TIC_0_FMT__MASK 0x0000003f +#define NV50_TIC_0_FMT__SHIFT 0 +#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 +#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 +#define NV50_TIC_0_FMT_32_32 0x00000004 +#define NV50_TIC_0_FMT_32_8 0x00000005 +#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 +#define NV50_TIC_0_FMT_2_10_10_10 0x00000009 +#define NV50_TIC_0_FMT_16_16 0x0000000c +#define NV50_TIC_0_FMT_8_24 0x0000000d +#define NV50_TIC_0_FMT_24_8 0x0000000e +#define NV50_TIC_0_FMT_32 0x0000000f +#define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010 +#define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011 +#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 +#define NV50_TIC_0_FMT_5_5_5_1 0x00000013 +#define NV50_TIC_0_FMT_1_5_5_5 0x00000014 +#define NV50_TIC_0_FMT_5_6_5 0x00000015 +#define NV50_TIC_0_FMT_6_5_5 0x00000016 +#define NV50_TIC_0_FMT_BPTC 0x00000017 +#define NV50_TIC_0_FMT_8_8 0x00000018 +#define NV50_TIC_0_FMT_16 0x0000001b +#define NV50_TIC_0_FMT_8 0x0000001d +#define NV50_TIC_0_FMT_4_4 0x0000001e +#define NV50_TIC_0_FMT_BITMAP_8X8 0x0000001f +#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020 +#define NV50_TIC_0_FMT_10_11_11 0x00000021 +#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022 +#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023 +#define NV50_TIC_0_FMT_DXT1 0x00000024 +#define NV50_TIC_0_FMT_DXT3 0x00000025 +#define NV50_TIC_0_FMT_DXT5 0x00000026 +#define NV50_TIC_0_FMT_RGTC1 0x00000027 +#define NV50_TIC_0_FMT_RGTC2 0x00000028 +#define NV50_TIC_0_FMT_Z24S8 0x00000029 +#define NV50_TIC_0_FMT_S8Z24 0x0000002a +#define NV50_TIC_0_FMT_X8Z24 0x0000002b +#define NV50_TIC_0_FMT_C8Z24_MS4_CS4 0x0000002c +#define NV50_TIC_0_FMT_C8Z24_MS8_CS8 0x0000002d +#define NV50_TIC_0_FMT_C8Z24_MS4_CS12 0x0000002e +#define NV50_TIC_0_FMT_Z32 0x0000002f +#define NV50_TIC_0_FMT_X24S8Z32 0x00000030 +#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS4_CS4 0x00000031 +#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS8_CS8 0x00000032 +#define NV50_TIC_0_FMT_X16C8X8Z32_MS4_CS4 0x00000033 +#define NV50_TIC_0_FMT_X16C8X8Z32_MS8_CS8 0x00000034 +#define NV50_TIC_0_FMT_X16C8S8Z32_MS4_CS4 0x00000035 +#define NV50_TIC_0_FMT_X16C8S8Z32_MS8_CS8 0x00000036 +#define NV50_TIC_0_FMT_X16C8S8X8Z24_MS4_CS12 0x00000037 +#define NV50_TIC_0_FMT_X16C8X8Z32_MS4_CS12 0x00000038 +#define NV50_TIC_0_FMT_X16C8S8Z32_MS4_CS12 0x00000039 +#define NV50_TIC_0_FMT_Z16 0x0000003a + +#define NV50_TIC_1 0x00000004 +#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff +#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 + +#define NV50_TIC_2 0x00000008 +#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff +#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 +#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 +#define NV50_TIC_2_TARGET__MASK 0x0003c000 +#define NV50_TIC_2_TARGET__SHIFT 14 +#define NV50_TIC_2_TARGET_1D 0x00000000 +#define NV50_TIC_2_TARGET_2D 0x00004000 +#define NV50_TIC_2_TARGET_3D 0x00008000 +#define NV50_TIC_2_TARGET_CUBE 0x0000c000 +#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 +#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 +#define NV50_TIC_2_TARGET_BUFFER 0x00018000 +#define NV50_TIC_2_TARGET_RECT 0x0001c000 +#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 +#define NV50_TIC_2_LINEAR 0x00040000 +#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000 +#define NV50_TIC_2_TILE_MODE_X__SHIFT 19 +#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 +#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 +#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 +#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 +#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 +#define NV50_TIC_2_2D_UNK0258__SHIFT 28 +#define NV50_TIC_2_NO_BORDER 0x40000000 +#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 + +#define NV50_TIC_3 0x0000000c +#define NV50_TIC_3_PITCH__MASK 0xffffffff +#define NV50_TIC_3_PITCH__SHIFT 0 + +#define NV50_TIC_4 0x00000010 +#define NV50_TIC_4_WIDTH__MASK 0xffffffff +#define NV50_TIC_4_WIDTH__SHIFT 0 + +#define NV50_TIC_5 0x00000014 +#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 +#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 +#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 +#define NV50_TIC_5_DEPTH__SHIFT 16 +#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff +#define NV50_TIC_5_HEIGHT__SHIFT 0 + +#define NV50_TIC_7 0x0000001c +#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f +#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 +#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 +#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 + +#define NV50_TSC__SIZE 0x00000020 +#define NV50_TSC_0 0x00000000 +#define NV50_TSC_0_WRAPS__MASK 0x00000007 +#define NV50_TSC_0_WRAPS__SHIFT 0 +#define NV50_TSC_0_WRAPT__MASK 0x00000038 +#define NV50_TSC_0_WRAPT__SHIFT 3 +#define NV50_TSC_0_WRAPR__MASK 0x000001c0 +#define NV50_TSC_0_WRAPR__SHIFT 6 +#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 +#define NV50_TSC_0_BOX_S__MASK 0x0001c000 +#define NV50_TSC_0_BOX_S__SHIFT 14 +#define NV50_TSC_0_BOX_T__MASK 0x000e0000 +#define NV50_TSC_0_BOX_T__SHIFT 17 +#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 +#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 + +#define NV50_TSC_1 0x00000004 +#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 +#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 +#define NV50_TSC_1_MAGF__MASK 0x00000003 +#define NV50_TSC_1_MAGF__SHIFT 0 +#define NV50_TSC_1_MAGF_NEAREST 0x00000001 +#define NV50_TSC_1_MAGF_LINEAR 0x00000002 +#define NV50_TSC_1_MINF__MASK 0x00000030 +#define NV50_TSC_1_MINF__SHIFT 4 +#define NV50_TSC_1_MINF_NEAREST 0x00000010 +#define NV50_TSC_1_MINF_LINEAR 0x00000020 +#define NV50_TSC_1_MIPF__MASK 0x000000c0 +#define NV50_TSC_1_MIPF__SHIFT 6 +#define NV50_TSC_1_MIPF_NONE 0x00000040 +#define NV50_TSC_1_MIPF_NEAREST 0x00000080 +#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 +#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 +#define NV50_TSC_1_LOD_BIAS__SHIFT 12 + +#define NV50_TSC_2 0x00000008 +#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff +#define NV50_TSC_2_MIN_LOD__SHIFT 0 +#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000 +#define NV50_TSC_2_MAX_LOD__SHIFT 12 + +#define NV50_TSC_4 0x00000010 +#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff +#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 + +#define NV50_TSC_5 0x00000014 +#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff +#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 + +#define NV50_TSC_6 0x00000018 +#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff +#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 + +#define NV50_TSC_7 0x0000001c +#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff +#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 + + +#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index d6b80c3ea7..1449cb04c6 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -476,6 +476,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) stk = (struct bld_value_stack *)phi->target; phi->target = NULL; + /* start with s == 1, src[0] is from outside the loop */ for (s = 1, n = 0; n < bb->num_in; ++n) { if (bb->in_kind[n] != CFG_EDGE_BACK) continue; @@ -487,8 +488,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) for (i = 0; i < 4; ++i) if (phi->src[i] && phi->src[i]->value == val) break; - if (i == 4) + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); nv_reference(bld->pc, &phi->src[s++], val); + } } bld->pc->current_block = save; @@ -1102,9 +1106,8 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, switch (src->Register.File) { case TGSI_FILE_CONSTANT: - dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1; - assert(dim_idx < 14); - assert(dim_idx == 1); /* for now */ + dim_idx = src->Dimension.Index; + assert(dim_idx < 15); res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type); SET_TYPE(res, type); @@ -1130,7 +1133,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: res = bld_saved_input(bld, idx, swz); if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP)) - return res; + break; res = new_value(bld->pc, bld->ti->input_file, type); res->reg.id = bld->ti->input_map[idx][swz]; @@ -1156,6 +1159,13 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_PREDICATE: res = bld_fetch_global(bld, &bld->pvs[idx][swz]); break; + case TGSI_FILE_SYSTEM_VALUE: + res = new_value(bld->pc, bld->ti->input_file, NV_TYPE_U32); + res->reg.id = bld->ti->sysval_map[idx]; + res = bld_insn_1(bld, NV_OP_LDA, res); + res = bld_insn_1(bld, NV_OP_CVT, res); + res->reg.type = NV_TYPE_F32; + break; default: NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); abort(); @@ -1468,7 +1478,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], uint opcode = translate_opcode(insn->Instruction.Opcode); int arg, dim, c; const int tic = insn->Src[1].Register.Index; - const int tsc = 0; + const int tsc = tic; const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0; get_tex_dim(insn, &dim, &arg); @@ -1717,6 +1727,10 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + if (bld->pc->current_block->exit && + !bld->pc->current_block->exit->is_terminator) + bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE); + --bld->cond_lvl; nvbb_attach_block(bld->pc->current_block, b, bld->out_kind); nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); @@ -1923,6 +1937,7 @@ bld_instruction(struct bld_context *bld, dst0[c] = bld_insn_2(bld, NV_OP_XOR, temp, temp); dst0[c]->insn->cc = NV_CC_EQ; nv_reference(bld->pc, &dst0[c]->insn->flags_src, src1); + dst0[c] = bld_insn_2(bld, NV_OP_SELECT, dst0[c], temp); } break; case TGSI_OPCODE_SUB: diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index bf5af4ddc6..7486977459 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -1,351 +1,347 @@ -#include "pipe/p_context.h" -#include "util/u_inlines.h" #include "util/u_format.h" -#include "util/u_math.h" #include "nv50_context.h" #include "nv50_transfer.h" -#include "nv50_resource.h" + +#include "nv50_defs.xml.h" struct nv50_transfer { - struct pipe_transfer base; - struct nouveau_bo *bo; - int map_refcnt; - unsigned level_offset; - unsigned level_tiling; - int level_pitch; - int level_width; - int level_height; - int level_depth; - int level_x; - int level_y; - int level_z; - unsigned nblocksx; - unsigned nblocksy; + struct pipe_transfer base; + struct nv50_m2mf_rect rect[2]; + uint32_t nblocksx; + uint32_t nblocksy; }; static void -nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, - struct nouveau_bo *src_bo, unsigned src_offset, - int src_pitch, unsigned src_tile_mode, - int sx, int sy, int sz, int sw, int sh, int sd, - struct nouveau_bo *dst_bo, unsigned dst_offset, - int dst_pitch, unsigned dst_tile_mode, - int dx, int dy, int dz, int dw, int dh, int dd, - int cpp, int width, int height, - unsigned src_reloc, unsigned dst_reloc) +nv50_m2mf_transfer_rect(struct pipe_screen *pscreen, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) { - struct nv50_screen *screen = nv50_screen(pscreen); - struct nouveau_channel *chan = screen->m2mf->channel; - struct nouveau_grobj *m2mf = screen->m2mf; - - src_reloc |= NOUVEAU_BO_RD; - dst_reloc |= NOUVEAU_BO_WR; - - WAIT_RING (chan, 14); - - if (!nouveau_bo_tile_layout(src_bo)) { - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, m2mf, - NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); - OUT_RING (chan, src_pitch); - src_offset += (sy * src_pitch) + (sx * cpp); - } else { - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 6); - OUT_RING (chan, 0); - OUT_RING (chan, src_tile_mode << 4); - OUT_RING (chan, sw * cpp); - OUT_RING (chan, sh); - OUT_RING (chan, sd); - OUT_RING (chan, sz); /* copying only 1 zslice per call */ - } - - if (!nouveau_bo_tile_layout(dst_bo)) { - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, m2mf, - NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); - OUT_RING (chan, dst_pitch); - dst_offset += (dy * dst_pitch) + (dx * cpp); - } else { - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 6); - OUT_RING (chan, 0); - OUT_RING (chan, dst_tile_mode << 4); - OUT_RING (chan, dw * cpp); - OUT_RING (chan, dh); - OUT_RING (chan, dd); - OUT_RING (chan, dz); /* copying only 1 zslice per call */ - } - - while (height) { - int line_count = height > 2047 ? 2047 : height; - - MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */ - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2); - OUT_RELOCh(chan, src_bo, src_offset, src_reloc); - OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); - BEGIN_RING(chan, m2mf, - NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); - OUT_RELOCl(chan, src_bo, src_offset, src_reloc); - OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); - if (nouveau_bo_tile_layout(src_bo)) { - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); - OUT_RING (chan, (sy << 16) | (sx * cpp)); - } else { - src_offset += (line_count * src_pitch); - } - if (nouveau_bo_tile_layout(dst_bo)) { - BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); - OUT_RING (chan, (dy << 16) | (dx * cpp)); - } else { - dst_offset += (line_count * dst_pitch); - } - BEGIN_RING(chan, m2mf, - NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); - OUT_RING (chan, width * cpp); - OUT_RING (chan, line_count); - OUT_RING (chan, 0x00000101); - OUT_RING (chan, 0); - FIRE_RING (chan); - - height -= line_count; - sy += line_count; - dy += line_count; - } + struct nouveau_channel *chan = nouveau_screen(pscreen)->channel; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + + assert(dst->cpp == src->cpp); + + if (nouveau_bo_tile_layout(src->bo)) { + BEGIN_RING(chan, RING_MF(LINEAR_IN), 6); + OUT_RING (chan, 0); + OUT_RING (chan, src->tile_mode << 4); + OUT_RING (chan, src->width * cpp); + OUT_RING (chan, src->height); + OUT_RING (chan, src->depth); + OUT_RING (chan, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_RING(chan, RING_MF(LINEAR_IN), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF_(NV04_M2MF_PITCH_IN), 1); + OUT_RING (chan, src->pitch); + } + + if (nouveau_bo_tile_layout(dst->bo)) { + BEGIN_RING(chan, RING_MF(LINEAR_OUT), 6); + OUT_RING (chan, 0); + OUT_RING (chan, dst->tile_mode << 4); + OUT_RING (chan, dst->width * cpp); + OUT_RING (chan, dst->height); + OUT_RING (chan, dst->depth); + OUT_RING (chan, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_RING(chan, RING_MF(LINEAR_OUT), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF_(NV04_M2MF_PITCH_OUT), 1); + OUT_RING (chan, dst->pitch); + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + MARK_RING (chan, 17, 4); + + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + + BEGIN_RING(chan, RING_MF_(NV04_M2MF_OFFSET_IN), 2); + OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + + if (nouveau_bo_tile_layout(src->bo)) { + BEGIN_RING(chan, RING_MF(TILING_POSITION_IN), 1); + OUT_RING (chan, (sy << 16) | (src->x * cpp)); + } else { + src_ofst += line_count * src->pitch; + } + if (nouveau_bo_tile_layout(dst->bo)) { + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT), 1); + OUT_RING (chan, (dy << 16) | (dst->x * cpp)); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_RING(chan, RING_MF_(NV04_M2MF_LINE_LENGTH_IN), 4); + OUT_RING (chan, nblocksx * cpp); + OUT_RING (chan, line_count); + OUT_RING (chan, (1 << 8) | (1 << 0)); + OUT_RING (chan, 0); + + height -= line_count; + sy += line_count; + dy += line_count; + } } -struct pipe_transfer * -nv50_miptree_transfer_new(struct pipe_context *pcontext, - struct pipe_resource *pt, - unsigned level, - unsigned usage, - const struct pipe_box *box) +void +nv50_sifc_linear_u8(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, void *data) { - struct pipe_screen *pscreen = pcontext->screen; - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nv50_miptree *mt = nv50_miptree(pt); - struct nv50_miptree_level *lvl = &mt->level[level]; - struct nv50_transfer *tx; - unsigned nx, ny, image = 0, boxz = 0; - int ret; - - /* XXX can't unify these here? */ - if (pt->target == PIPE_TEXTURE_CUBE) - image = box->z; - else if (pt->target == PIPE_TEXTURE_3D) - boxz = box->z; - - tx = CALLOC_STRUCT(nv50_transfer); - if (!tx) - return NULL; - - /* Don't handle 3D transfers yet. - */ - assert(box->depth == 1); - - - pipe_resource_reference(&tx->base.resource, pt); - tx->base.level = level; - tx->base.usage = usage; - tx->base.box = *box; - tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level)); - tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)); - tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format); - tx->base.usage = usage; - - tx->level_pitch = lvl->pitch; - tx->level_width = u_minify(mt->base.base.width0, level); - tx->level_height = u_minify(mt->base.base.height0, level); - tx->level_depth = u_minify(mt->base.base.depth0, level); - tx->level_offset = lvl->image_offset[image]; - tx->level_tiling = lvl->tile_mode; - tx->level_z = boxz; - tx->level_x = util_format_get_nblocksx(pt->format, box->x); - tx->level_y = util_format_get_nblocksy(pt->format, box->y); - ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - tx->nblocksy * tx->base.stride, &tx->bo); - if (ret) { - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - nx = util_format_get_nblocksx(pt->format, box->width); - ny = util_format_get_nblocksy(pt->format, box->height); - - nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, - tx->level_pitch, tx->level_tiling, - box->x, box->y, boxz, - tx->nblocksx, tx->nblocksy, - tx->level_depth, - tx->bo, 0, - tx->base.stride, tx->bo->tile_mode, - 0, 0, 0, - tx->nblocksx, tx->nblocksy, 1, - util_format_get_blocksize(pt->format), nx, ny, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, - NOUVEAU_BO_GART); - } - - return &tx->base; + struct nouveau_channel *chan = nv->screen->channel; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + unsigned xcoord = offset & 0xff; + + offset &= ~0xff; + + MARK_RING (chan, 23, 4); + BEGIN_RING(chan, RING_2D(DST_FORMAT), 2); + OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D(DST_PITCH), 5); + OUT_RING (chan, 262144); + OUT_RING (chan, 65536); + OUT_RING (chan, 1); + OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10); + OUT_RING (chan, size); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, xcoord); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + + while (count) { + unsigned nr = AVAIL_RING(chan); + + if (nr < 9) { + FIRE_RING(chan); + nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR); + continue; + } + nr = MIN2(count, nr - 1); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_RING_NI(chan, RING_2D(SIFC_DATA), nr); + OUT_RINGp (chan, src, nr); + + src += nr; + count -= nr; + } } void -nv50_miptree_transfer_del(struct pipe_context *pcontext, - struct pipe_transfer *ptx) +nv50_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) { - struct nv50_transfer *tx = (struct nv50_transfer *)ptx; - struct nv50_miptree *mt = nv50_miptree(ptx->resource); - struct pipe_resource *pt = ptx->resource; - - unsigned nx = util_format_get_nblocksx(pt->format, tx->base.box.width); - unsigned ny = util_format_get_nblocksy(pt->format, tx->base.box.height); - - if (ptx->usage & PIPE_TRANSFER_WRITE) { - struct pipe_screen *pscreen = pcontext->screen; - - nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, - tx->base.stride, tx->bo->tile_mode, - 0, 0, 0, - tx->nblocksx, tx->nblocksy, 1, - mt->base.bo, tx->level_offset, - tx->level_pitch, tx->level_tiling, - tx->level_x, tx->level_y, tx->level_z, - tx->nblocksx, tx->nblocksy, - tx->level_depth, - util_format_get_blocksize(pt->format), nx, ny, - NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART); - } - - nouveau_bo_ref(NULL, &tx->bo); - pipe_resource_reference(&ptx->resource, NULL); - FREE(ptx); + struct nouveau_channel *chan = nv->screen->channel; + + BEGIN_RING(chan, RING_MF(LINEAR_IN), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(LINEAR_OUT), 1); + OUT_RING (chan, 1); + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + MARK_RING (chan, 11, 4); + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF_(NV04_M2MF_OFFSET_IN), 2); + OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF_(NV04_M2MF_LINE_LENGTH_IN), 4); + OUT_RING (chan, bytes); + OUT_RING (chan, 1); + OUT_RING (chan, (1 << 8) | (1 << 0)); + OUT_RING (chan, 0); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } } -void * -nv50_miptree_transfer_map(struct pipe_context *pcontext, - struct pipe_transfer *ptx) +struct pipe_transfer * +nv50_miptree_transfer_new(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box) { - struct nv50_transfer *tx = (struct nv50_transfer *)ptx; - unsigned flags = 0; - int ret; - - if (tx->map_refcnt++) - return tx->bo->map; - - if (ptx->usage & PIPE_TRANSFER_WRITE) - flags |= NOUVEAU_BO_WR; - if (ptx->usage & PIPE_TRANSFER_READ) - flags |= NOUVEAU_BO_RD; - - ret = nouveau_bo_map(tx->bo, flags); - if (ret) { - tx->map_refcnt = 0; - return NULL; - } - return tx->bo->map; + struct nv50_context *nv50 = nv50_context(pctx); + struct pipe_screen *pscreen = pctx->screen; + struct nouveau_device *dev = nv50->screen->base.device; + struct nv50_miptree *mt = nv50_miptree(res); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct nv50_transfer *tx; + uint32_t size; + uint32_t w, h, d, z, layer; + int ret; + + if (mt->layout_3d) { + z = box->z; + d = u_minify(res->depth0, level); + layer = 0; + } else { + z = 0; + d = 1; + layer = box->z; + } + + tx = CALLOC_STRUCT(nv50_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + w = u_minify(res->width0, level); + h = u_minify(res->height0, level); + + tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); + + tx->rect[0].bo = mt->base.bo; + tx->rect[0].base = lvl->offset + layer * mt->layer_stride; + tx->rect[0].tile_mode = lvl->tile_mode; + tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); + tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].z = z; + tx->rect[0].width = util_format_get_nblocksx(res->format, w); + tx->rect[0].height = util_format_get_nblocksy(res->format, h); + tx->rect[0].depth = d; + tx->rect[0].pitch = lvl->pitch; + tx->rect[0].domain = NOUVEAU_BO_VRAM; + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->base.box.depth, &tx->rect[1].bo); + if (ret) { + FREE(tx); + return NULL; + } + + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; + unsigned i; + for (i = 0; i < box->depth; ++i) { + nv50_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; + } + + return &tx->base; } void -nv50_miptree_transfer_unmap(struct pipe_context *pcontext, - struct pipe_transfer *ptx) +nv50_miptree_transfer_del(struct pipe_context *pctx, + struct pipe_transfer *transfer) { - struct nv50_transfer *tx = (struct nv50_transfer *)ptx; - - if (--tx->map_refcnt) - return; - nouveau_bo_unmap(tx->bo); + struct pipe_screen *pscreen = pctx->screen; + struct nv50_transfer *tx = (struct nv50_transfer *)transfer; + struct nv50_miptree *mt = nv50_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->base.box.depth; ++i) { + nv50_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + } + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); } +void * +nv50_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nv50_transfer *tx = (struct nv50_transfer *)transfer; + int ret; + unsigned flags = 0; + + if (tx->rect[1].bo->map) + return tx->rect[1].bo->map; + + if (transfer->usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (transfer->usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags); + if (ret) + return NULL; + return tx->rect[1].bo->map; +} void -nv50_upload_sifc(struct nv50_context *nv50, - struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, - unsigned dst_format, int dst_w, int dst_h, int dst_pitch, - void *src, unsigned src_format, int src_pitch, - int x, int y, int w, int h, int cpp) +nv50_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) { - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *eng2d = nv50->screen->eng2d; - unsigned line_dwords = (w * cpp + 3) / 4; - - reloc |= NOUVEAU_BO_WR; - - MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ - - if (nouveau_bo_tile_layout(bo)) { - BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); - OUT_RING (chan, dst_format); - OUT_RING (chan, 0); - OUT_RING (chan, bo->tile_mode << 4); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - } else { - BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); - OUT_RING (chan, dst_format); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1); - OUT_RING (chan, dst_pitch); - } - - BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4); - OUT_RING (chan, dst_w); - OUT_RING (chan, dst_h); - OUT_RELOCh(chan, bo, dst_offset, reloc); - OUT_RELOCl(chan, bo, dst_offset, reloc); - - /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, src_format); - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); - OUT_RING (chan, w); - OUT_RING (chan, h); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, x); - OUT_RING (chan, 0); - OUT_RING (chan, y); - - while (h--) { - const uint32_t *p = src; - unsigned count = line_dwords; - - while (count) { - unsigned nr = MIN2(count, 1792); - - if (AVAIL_RING(chan) <= nr) { - FIRE_RING (chan); - - BEGIN_RING(chan, eng2d, - NV50_2D_DST_ADDRESS_HIGH, 2); - OUT_RELOCh(chan, bo, dst_offset, reloc); - OUT_RELOCl(chan, bo, dst_offset, reloc); - } - assert(AVAIL_RING(chan) > nr); - - BEGIN_RING(chan, eng2d, - NV50_2D_SIFC_DATA | (2 << 29), nr); - OUT_RINGp (chan, p, nr); - - p += nr; - count -= nr; - } - - src = (uint8_t *) src + src_pitch; - } + struct nv50_transfer *tx = (struct nv50_transfer *)transfer; + + nouveau_bo_unmap(tx->rect[1].bo); } + diff --git a/src/gallium/drivers/nv50/nv50_transfer.h b/src/gallium/drivers/nv50/nv50_transfer.h index 6699bf546e..d3259ef4a5 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.h +++ b/src/gallium/drivers/nv50/nv50_transfer.h @@ -1,31 +1,38 @@ -#ifndef NV50_TRANSFER_H -#define NV50_TRANSFER_H +#ifndef __NV50_TRANSFER_H__ +#define __NV50_TRANSFER_H__ #include "pipe/p_state.h" - struct pipe_transfer * nv50_miptree_transfer_new(struct pipe_context *pcontext, - struct pipe_resource *pt, - unsigned level, - unsigned usage, - const struct pipe_box *box); + struct pipe_resource *pt, + unsigned level, + unsigned usage, + const struct pipe_box *box); void nv50_miptree_transfer_del(struct pipe_context *pcontext, - struct pipe_transfer *ptx); + struct pipe_transfer *ptx); void * nv50_miptree_transfer_map(struct pipe_context *pcontext, - struct pipe_transfer *ptx); + struct pipe_transfer *ptx); void nv50_miptree_transfer_unmap(struct pipe_context *pcontext, - struct pipe_transfer *ptx); + struct pipe_transfer *ptx); -extern void -nv50_upload_sifc(struct nv50_context *nv50, - struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, - unsigned dst_format, int dst_w, int dst_h, int dst_pitch, - void *src, unsigned src_format, int src_pitch, - int x, int y, int w, int h, int cpp); +struct nv50_m2mf_rect { + struct nouveau_bo *bo; + uint32_t base; + unsigned domain; + uint32_t pitch; + uint32_t width; + uint32_t x; + uint32_t height; + uint32_t y; + uint16_t depth; + uint16_t z; + uint16_t tile_mode; + uint16_t cpp; +}; #endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index d41a59d05d..abdb9ce2f9 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Ben Skeggs + * Copyright 2010 Christoph Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,540 +24,694 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" -#include "util/u_split_prim.h" +#include "translate/translate.h" #include "nv50_context.h" #include "nv50_resource.h" -struct instance { - struct nouveau_bo *bo; - unsigned delta; - unsigned stride; - unsigned step; - unsigned divisor; -}; +#include "nv50_3d.xml.h" -static void -instance_init(struct nv50_context *nv50, struct instance *a, unsigned first) +void +nv50_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) { - int i; - - for (i = 0; i < nv50->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; - struct pipe_vertex_buffer *vb; - - a[i].divisor = ve->instance_divisor; - if (a[i].divisor) { - vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - - a[i].bo = nv50_resource(vb->buffer)->bo; - a[i].stride = vb->stride; - a[i].step = first % a[i].divisor; - a[i].delta = vb->buffer_offset + ve->src_offset + - (first * a[i].stride); - } - } + struct nv50_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); } -static void -instance_step(struct nv50_context *nv50, struct instance *a) +void * +nv50_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) { - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - int i; - - for (i = 0; i < nv50->vtxelt->num_elements; i++) { - if (!a[i].divisor) - continue; - - BEGIN_RING(chan, tesla, - NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); - OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD | - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); - OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD | - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); - if (++a[i].step == a[i].divisor) { - a[i].step = 0; - a[i].delta += a[i].stride; - } - } + struct nv50_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + + so = MALLOC(sizeof(*so) + + num_elements * sizeof(struct nv50_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_elts = 0; + so->instance_bufs = 0; + so->need_conversion = FALSE; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nv50_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + return NULL; + } + so->element[i].state = nv50_format_table[fmt].vtx; + so->need_conversion = TRUE; + } + so->element[i].state |= i; + + if (1) { + unsigned j = transkey.nr_elements++; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + } + } + } + + so->translate = translate_create(&transkey); + so->vertex_size = transkey.output_stride / 4; + so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN / + MAX2(so->vertex_size, 1); + + return so; } +#define NV50_3D_VERTEX_ATTRIB_INACTIVE \ + NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_CONST + static void -nv50_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count, - unsigned startInstance, unsigned instanceCount) +nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb, + struct pipe_vertex_element *ve, unsigned attr) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct instance a[16]; - unsigned prim = nv50_prim(mode); - - instance_init(nv50, a, startInstance); - if (!nv50_state_validate(nv50, 10 + 16*3)) - return; - - if (nv50->vbo_fifo) { - nv50_push_elements_instanced(pipe, NULL, 0, 0, mode, start, - count, startInstance, - instanceCount); - return; - } - - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); - OUT_RING (chan, NV50_CB_AUX | (24 << 8)); - OUT_RING (chan, startInstance); - while (instanceCount--) { - if (AVAIL_RING(chan) < (7 + 16*3)) { - FIRE_RING(chan); - if (!nv50_state_validate(nv50, 7 + 16*3)) { - assert(0); - return; - } - } - instance_step(nv50, a); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, prim); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - - prim |= (1 << 28); - } + const void *data; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv04_resource *res = nv04_resource(vb->buffer); + float v[4]; + const unsigned nc = util_format_get_nr_components(ve->src_format); + + data = nouveau_resource_map_offset(&nv50->base, res, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_RD); + + util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1); + + switch (nc) { + case 4: + BEGIN_RING(chan, RING_3D(VTX_ATTR_4F_X(attr)), 4); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + OUT_RINGf (chan, v[3]); + break; + case 3: + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(attr)), 3); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + break; + case 2: + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(attr)), 2); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + break; + case 1: + if (attr == nv50->vertprog->vp.edgeflag) { + BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); + OUT_RING (chan, v[0] ? 1 : 0); + } + BEGIN_RING(chan, RING_3D(VTX_ATTR_1F(attr)), 1); + OUT_RINGf (chan, v[0]); + break; + default: + assert(0); + break; + } } -struct inline_ctx { - struct nv50_context *nv50; - void *map; -}; +static INLINE void +nv50_vbuf_range(struct nv50_context *nv50, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) { + /* TODO: use min and max instance divisor to get a proper range */ + *base = 0; + *size = nv50->vtxbuf[vbi].buffer->width0; + } else { + assert(nv50->vbo_max_index != ~0); + *base = nv50->vbo_min_index * nv50->vtxbuf[vbi].stride; + *size = (nv50->vbo_max_index - + nv50->vbo_min_index + 1) * nv50->vtxbuf[vbi].stride; + } +} static void -inline_elt08(void *priv, unsigned start, unsigned count) +nv50_prevalidate_vbufs(struct nv50_context *nv50) { - struct inline_ctx *ctx = priv; - struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; - uint8_t *map = (uint8_t *)ctx->map + start; - - if (count & 1) { - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, map[0]); - map++; - count &= ~1; - } - - count >>= 1; - if (!count) - return; - - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count); - while (count--) { - OUT_RING(chan, (map[1] << 16) | map[0]); - map += 2; - } + struct pipe_vertex_buffer *vb; + struct nv04_resource *buf; + int i; + uint32_t base, size; + + nv50->vbo_fifo = nv50->vbo_user = 0; + + nv50_bufctx_reset(nv50, NV50_BUFCTX_VERTEX); + + for (i = 0; i < nv50->num_vtxbufs; ++i) { + vb = &nv50->vtxbuf[i]; + if (!vb->stride) + continue; + buf = nv04_resource(vb->buffer); + + /* NOTE: user buffers with temporary storage count as mapped by GPU */ + if (!nouveau_resource_mapped_by_gpu(vb->buffer)) { + if (nv50->vbo_push_hint) { + nv50->vbo_fifo = ~0; + continue; + } else { + if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) { + nv50->vbo_user |= 1 << i; + assert(vb->stride > vb->buffer_offset); + nv50_vbuf_range(nv50, i, &base, &size); + nouveau_user_buffer_upload(buf, base, size); + } else { + nouveau_buffer_migrate(&nv50->base, buf, NOUVEAU_BO_GART); + } + nv50->base.vbo_dirty = TRUE; + } + } + nv50_bufctx_add_resident(nv50, NV50_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD); + nouveau_buffer_adjust_score(&nv50->base, buf, 1); + } } static void -inline_elt16(void *priv, unsigned start, unsigned count) +nv50_update_user_vbufs(struct nv50_context *nv50) { - struct inline_ctx *ctx = priv; - struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; - uint16_t *map = (uint16_t *)ctx->map + start; - - if (count & 1) { - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, map[0]); - count &= ~1; - map++; - } - - count >>= 1; - if (!count) - return; - - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count); - while (count--) { - OUT_RING(chan, (map[1] << 16) | map[0]); - map += 2; - } + struct nouveau_channel *chan = nv50->screen->base.channel; + uint32_t base, offset, size; + int i; + uint32_t written = 0; + + for (i = 0; i < nv50->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe; + const int b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; + struct nv04_resource *buf = nv04_resource(vb->buffer); + + if (!(nv50->vbo_user & (1 << b))) + continue; + + if (!vb->stride) { + nv50_emit_vtxattr(nv50, vb, ve, i); + continue; + } + nv50_vbuf_range(nv50, b, &base, &size); + + if (!(written & (1 << b))) { + written |= 1 << b; + nouveau_user_buffer_upload(buf, base, size); + } + offset = vb->buffer_offset + ve->src_offset; + + MARK_RING (chan, 6, 4); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_START_HIGH(i)), 2); + OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); + } + nv50->base.vbo_dirty = TRUE; } -static void -inline_elt32(void *priv, unsigned start, unsigned count) +static INLINE void +nv50_release_user_vbufs(struct nv50_context *nv50) { - struct inline_ctx *ctx = priv; - struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; + uint32_t vbo_user = nv50->vbo_user; - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count); - OUT_RINGp (chan, (uint32_t *)ctx->map + start, count); + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nouveau_buffer_release_gpu_storage(nv04_resource(nv50->vtxbuf[i].buffer)); + } } -static void -inline_edgeflag(void *priv, boolean enabled) +void +nv50_vertex_arrays_validate(struct nv50_context *nv50) { - struct inline_ctx *ctx = priv; - struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nv50_vertex_stateobj *vertex = nv50->vertex; + struct pipe_vertex_buffer *vb; + struct nv50_vertex_element *ve; + unsigned i; + + if (unlikely(vertex->need_conversion)) { + nv50->vbo_fifo = ~0; + nv50->vbo_user = 0; + } else { + nv50_prevalidate_vbufs(nv50); + } + + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_ATTRIB(0)), vertex->num_elements); + for (i = 0; i < vertex->num_elements; ++i) { + ve = &vertex->element[i]; + vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index]; + + if (likely(vb->stride) || nv50->vbo_fifo) { + OUT_RING(chan, ve->state); + } else { + OUT_RING(chan, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST); + nv50->vbo_fifo &= ~(1 << i); + } + } + + for (i = 0; i < vertex->num_elements; ++i) { + struct nv04_resource *res; + unsigned size, offset; + + ve = &vertex->element[i]; + vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index]; + + if (unlikely(ve->pipe.instance_divisor)) { + if (!(nv50->state.instance_elts & (1 << i))) { + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + OUT_RING (chan, 1); + } + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1); + OUT_RING (chan, ve->pipe.instance_divisor); + } else + if (unlikely(nv50->state.instance_elts & (1 << i))) { + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + OUT_RING (chan, 0); + } + + res = nv04_resource(vb->buffer); + + if (nv50->vbo_fifo || unlikely(vb->stride == 0)) { + if (!nv50->vbo_fifo) + nv50_emit_vtxattr(nv50, vb, &ve->pipe, i); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, 0); + continue; + } + + size = vb->buffer->width0; + offset = ve->pipe.src_offset + vb->buffer_offset; + + MARK_RING (chan, 8, 4); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_START_HIGH(i)), 2); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); + } + for (; i < nv50->state.num_vtxelts; ++i) { + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_ATTRIB(i)), 1); + OUT_RING (chan, NV50_3D_VERTEX_ATTRIB_INACTIVE); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, 0); + } + + nv50->state.num_vtxelts = vertex->num_elements; + nv50->state.instance_elts = vertex->instance_elts; +} + +#define NV50_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n - BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (chan, enabled ? 1 : 0); +static INLINE unsigned +nv50_prim_gl(unsigned prim) +{ + switch (prim) { + NV50_PRIM_GL_CASE(POINTS); + NV50_PRIM_GL_CASE(LINES); + NV50_PRIM_GL_CASE(LINE_LOOP); + NV50_PRIM_GL_CASE(LINE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLES); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLE_FAN); + NV50_PRIM_GL_CASE(QUADS); + NV50_PRIM_GL_CASE(QUAD_STRIP); + NV50_PRIM_GL_CASE(POLYGON); + NV50_PRIM_GL_CASE(LINES_ADJACENCY); + NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + default: + return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } } static void -nv50_draw_elements_inline(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count, - unsigned startInstance, unsigned instanceCount) +nv50_draw_vbo_flush_notify(struct nouveau_channel *chan) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct pipe_transfer *transfer; - struct instance a[16]; - struct inline_ctx ctx; - struct util_split_prim s; - boolean nzi = FALSE; - unsigned overhead; - - overhead = 16*3; /* potential instance adjustments */ - overhead += 4; /* Begin()/End() */ - overhead += 4; /* potential edgeflag disable/reenable */ - overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */ - - s.priv = &ctx; - if (indexSize == 1) - s.emit = inline_elt08; - else - if (indexSize == 2) - s.emit = inline_elt16; - else - s.emit = inline_elt32; - s.edge = inline_edgeflag; - - ctx.nv50 = nv50; - ctx.map = pipe_buffer_map(pipe, indexBuffer, PIPE_TRANSFER_READ, &transfer); - assert(ctx.map); - if (!ctx.map) - return; - - instance_init(nv50, a, startInstance); - if (!nv50_state_validate(nv50, overhead + 6 + 3)) - return; - - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); - OUT_RING (chan, NV50_CB_AUX | (24 << 8)); - OUT_RING (chan, startInstance); - while (instanceCount--) { - unsigned max_verts; - boolean done; - - util_split_prim_init(&s, mode, start, count); - do { - if (AVAIL_RING(chan) < (overhead + 6)) { - FIRE_RING(chan); - if (!nv50_state_validate(nv50, (overhead + 6))) { - assert(0); - return; - } - } - - max_verts = AVAIL_RING(chan) - overhead; - if (max_verts > 2047) - max_verts = 2047; - if (indexSize != 4) - max_verts <<= 1; - instance_step(nv50, a); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0)); - done = util_split_prim_next(&s, max_verts); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - } while (!done); - - nzi = TRUE; - } - - pipe_buffer_unmap(pipe, indexBuffer, transfer); + struct nv50_context *nv50 = chan->user_private; + + nouveau_fence_update(&nv50->screen->base, TRUE); + + nv50_bufctx_emit_relocs(nv50); } static void -nv50_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count, - unsigned startInstance, unsigned instanceCount) +nv50_draw_arrays(struct nv50_context *nv50, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct instance a[16]; - unsigned prim = nv50_prim(mode); - - instance_init(nv50, a, startInstance); - if (!nv50_state_validate(nv50, 13 + 16*3)) - return; - - if (nv50->vbo_fifo) { - nv50_push_elements_instanced(pipe, indexBuffer, indexSize, - indexBias, mode, start, count, - startInstance, instanceCount); - return; - } - - /* indices are uint32 internally, so large indexBias means negative */ - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_BASE, 1); - OUT_RING (chan, indexBias); - - if (!nv50_resource_mapped_by_gpu(indexBuffer) || indexSize == 1) { - nv50_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count, startInstance, - instanceCount); - return; - } - - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); - OUT_RING (chan, NV50_CB_AUX | (24 << 8)); - OUT_RING (chan, startInstance); - while (instanceCount--) { - if (AVAIL_RING(chan) < (7 + 16*3)) { - FIRE_RING(chan); - if (!nv50_state_validate(nv50, 10 + 16*3)) { - assert(0); - return; - } - } - instance_step(nv50, a); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, prim); - if (indexSize == 4) { - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0); - OUT_RING (chan, count); - nouveau_pushbuf_submit(chan, - nv50_resource(indexBuffer)->bo, - start << 2, count << 2); - } else - if (indexSize == 2) { - unsigned vb_start = (start & ~1); - unsigned vb_end = (start + count + 1) & ~1; - unsigned dwords = (vb_end - vb_start) >> 1; - - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1); - OUT_RING (chan, ((start & 1) << 31) | count); - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0); - OUT_RING (chan, dwords); - nouveau_pushbuf_submit(chan, - nv50_resource(indexBuffer)->bo, - vb_start << 1, dwords << 2); - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1); - OUT_RING (chan, 0); - } - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - - prim |= (1 << 28); - } + struct nouveau_channel *chan = nv50->screen->base.channel; + unsigned prim; + + chan->flush_notify = nv50_draw_vbo_flush_notify; + chan->user_private = nv50; + + prim = nv50_prim_gl(mode); + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, prim); + BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + chan->flush_notify = nv50_default_flush_notify; } -void -nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +static void +nv50_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, + unsigned start, unsigned count) { - struct nv50_context *nv50 = nv50_context(pipe); - - if (info->indexed && nv50->idxbuf.buffer) { - unsigned offset; - - assert(nv50->idxbuf.offset % nv50->idxbuf.index_size == 0); - offset = nv50->idxbuf.offset / nv50->idxbuf.index_size; - - nv50_draw_elements_instanced(pipe, - nv50->idxbuf.buffer, - nv50->idxbuf.index_size, - info->index_bias, - info->mode, - info->start + offset, - info->count, - info->start_instance, - info->instance_count); - } - else { - nv50_draw_arrays_instanced(pipe, - info->mode, - info->start, - info->count, - info->start_instance, - info->instance_count); - } + map += start; + + if (count & 3) { + unsigned i; + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + OUT_RING(chan, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } } -static INLINE boolean -nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, - struct nouveau_stateobj **pso, - struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) - +static void +nv50_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, + unsigned start, unsigned count) { - struct nouveau_stateobj *so; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo; - float v[4]; - int ret; - unsigned nr_components = util_format_get_nr_components(ve->src_format); - - ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); - if (ret) - return FALSE; - - util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map + - (vb->buffer_offset + ve->src_offset), 0, - 0, 0, 1, 1); - so = *pso; - if (!so) - *pso = so = so_new(nv50->vtxelt->num_elements, - nv50->vtxelt->num_elements * 4, 0); - - switch (nr_components) { - case 4: - so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - so_data (so, fui(v[3])); - break; - case 3: - so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - break; - case 2: - so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - break; - case 1: - if (attrib == nv50->vertprog->vp.edgeflag) { - so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); - so_data (so, v[0] ? 1 : 0); - } - so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); - so_data (so, fui(v[0])); - break; - default: - nouveau_bo_unmap(bo); - return FALSE; - } - - nouveau_bo_unmap(bo); - return TRUE; + map += start; + + if (count & 1) { + count &= ~1; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } } -void -nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso) +static void +nv50_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) { - unsigned i; + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); - for (i = 0; i < cso->num_elements; ++i) - cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx; + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); + OUT_RINGp (chan, map, nr); + + map += nr; + count -= nr; + } } -struct nouveau_stateobj * -nv50_vbo_validate(struct nv50_context *nv50) +static void +nv50_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) { - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr; - unsigned i, n_ve; - - /* don't validate if Gallium took away our buffers */ - if (nv50->vtxbuf_nr == 0) - return NULL; - - nv50->vbo_fifo = 0; - if (nv50->screen->force_push || - nv50->vertprog->vp.edgeflag < 16) - nv50->vbo_fifo = 0xffff; - - for (i = 0; i < nv50->vtxbuf_nr; i++) { - if (nv50->vtxbuf[i].stride && - !nv50_resource_mapped_by_gpu(nv50->vtxbuf[i].buffer)) - nv50->vbo_fifo = 0xffff; - } - - n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr); - - vtxattr = NULL; - vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4); - vtxfmt = so_new(1, n_ve, 0); - so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); - - for (i = 0; i < nv50->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; - struct pipe_vertex_buffer *vb = - &nv50->vtxbuf[ve->vertex_buffer_index]; - struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo; - uint32_t hw = nv50->vtxelt->hw[i]; - - if (!vb->stride && - nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) { - so_data(vtxfmt, hw | (1 << 4)); - - so_method(vtxbuf, tesla, - NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); - so_data (vtxbuf, 0); - - nv50->vbo_fifo &= ~(1 << i); - continue; - } - - if (nv50->vbo_fifo) { - so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i)); - so_method(vtxbuf, tesla, - NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); - so_data (vtxbuf, 0); - continue; - } - - so_data(vtxfmt, hw | i); - - so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); - so_data (vtxbuf, 0x20000000 | - (ve->instance_divisor ? 0 : vb->stride)); - so_reloc (vtxbuf, bo, vb->buffer_offset + - ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (vtxbuf, bo, vb->buffer_offset + - ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - - /* vertex array limits */ - so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); - so_reloc (vtxbuf, bo, vb->buffer->width0 - 1, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); - so_reloc (vtxbuf, bo, vb->buffer->width0 - 1, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); - } - for (; i < n_ve; ++i) { - so_data (vtxfmt, 0x7e080010); - - so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); - so_data (vtxbuf, 0); - } - nv50->state.vtxelt_nr = nv50->vtxelt->num_elements; - - so_ref (vtxbuf, &nv50->state.vtxbuf); - so_ref (vtxattr, &nv50->state.vtxattr); - so_ref (NULL, &vtxbuf); - so_ref (NULL, &vtxattr); - return vtxfmt; + map += start; + + if (count & 1) { + count--; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } } +static void +nv50_draw_elements(struct nv50_context *nv50, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + void *data; + unsigned prim; + const unsigned index_size = nv50->idxbuf.index_size; + + chan->flush_notify = nv50_draw_vbo_flush_notify; + chan->user_private = nv50; + + prim = nv50_prim_gl(mode); + + if (index_bias != nv50->state.index_bias) { + BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1); + OUT_RING (chan, index_bias); + nv50->state.index_bias = index_bias; + } + + if (nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer)) { + struct nv04_resource *res = nv04_resource(nv50->idxbuf.buffer); + + start += nv50->idxbuf.offset >> (index_size >> 1); + + nouveau_buffer_adjust_score(&nv50->base, res, 1); + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, mode); + + switch (index_size) { + case 4: + { + WAIT_RING (chan, 2); + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32) | 0x30000, 0); + OUT_RING (chan, count); + nouveau_pushbuf_submit(chan, res->bo, res->offset + start * 4, + count * 4); + } + break; + case 2: + { + unsigned pb_start = (start & ~1); + unsigned pb_words = (((start + count + 1) & ~1) - pb_start) >> 1; + + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U16_SETUP), 1); + OUT_RING (chan, (start << 31) | count); + WAIT_RING (chan, 2); + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U16) | 0x30000, 0); + OUT_RING (chan, pb_words); + nouveau_pushbuf_submit(chan, res->bo, res->offset + pb_start * 2, + pb_words * 4); + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U16_SETUP), 1); + OUT_RING (chan, 0); + break; + } + case 1: + { + unsigned pb_start = (start & ~3); + unsigned pb_words = (((start + count + 3) & ~3) - pb_start) >> 1; + + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U8_SETUP), 1); + OUT_RING (chan, (start << 30) | count); + WAIT_RING (chan, 2); + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U8) | 0x30000, 0); + OUT_RING (chan, pb_words); + nouveau_pushbuf_submit(chan, res->bo, res->offset + pb_start, + pb_words * 4); + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U8_SETUP), 1); + OUT_RING (chan, 0); + break; + } + default: + assert(0); + return; + } + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + nv50_resource_fence(res, NOUVEAU_BO_RD); + + mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } else { + data = nouveau_resource_map_offset(&nv50->base, + nv04_resource(nv50->idxbuf.buffer), + nv50->idxbuf.offset, NOUVEAU_BO_RD); + if (!data) + return; + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, prim); + switch (index_size) { + case 1: + nv50_draw_elements_inline_u08(chan, data, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(chan, data, start, count); + break; + case 4: + if (shorten) + nv50_draw_elements_inline_u32_short(chan, data, start, count); + else + nv50_draw_elements_inline_u32(chan, data, start, count); + break; + default: + assert(0); + return; + } + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } + + chan->flush_notify = nv50_default_flush_notify; +} +void +nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nv50->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && + ((info->max_index - info->min_index + 64) < info->count)); + + nv50->vbo_min_index = info->min_index; + nv50->vbo_max_index = info->max_index; + + if (nv50->vbo_push_hint != !!nv50->vbo_fifo) + nv50->dirty |= NV50_NEW_ARRAYS; + + if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) + nv50_update_user_vbufs(nv50); + + nv50_state_validate(nv50); + + if (nv50->vbo_fifo) { + nv50_push_vbo(nv50, info); + return; + } + + if (nv50->state.instance_base != info->start_instance) { + nv50->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ + BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1); + OUT_RING (chan, info->start_instance); + } + + if (nv50->base.vbo_dirty) { + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1); + OUT_RING (chan, 0); + nv50->base.vbo_dirty = FALSE; + } + + if (!info->indexed) { + nv50_draw_arrays(nv50, + info->mode, info->start, info->count, + info->instance_count); + } else { + boolean shorten = info->max_index <= 65535; + + assert(nv50->idxbuf.buffer); + + if (info->primitive_restart != nv50->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); + OUT_RING (chan, 1); + OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 1); + OUT_RING (chan, 0); + } + nv50->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1); + OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nv50_draw_elements(nv50, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } + + nv50_release_user_vbufs(nv50); +} diff --git a/src/gallium/drivers/nv50/nv50_winsys.h b/src/gallium/drivers/nv50/nv50_winsys.h new file mode 100644 index 0000000000..afa2a00c7a --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_winsys.h @@ -0,0 +1,106 @@ + +#ifndef __NV50_WINSYS_H__ +#define __NV50_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> + +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_reloc.h" +#include "nouveau/nouveau_notifier.h" + +#include "nouveau/nouveau_buffer.h" + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + +#define NV50_SUBCH_3D 5 +#define NV50_SUBCH_2D 6 +#define NV50_SUBCH_MF 7 + +#define NV50_MF_(n) NV50_M2MF_##n + +#define RING_3D(n) ((NV50_SUBCH_3D << 13) | NV50_3D_##n) +#define RING_2D(n) ((NV50_SUBCH_2D << 13) | NV50_2D_##n) +#define RING_MF(n) ((NV50_SUBCH_MF << 13) | NV50_MF_(n)) + +#define RING_3D_(m) ((NV50_SUBCH_3D << 13) | (m)) +#define RING_2D_(m) ((NV50_SUBCH_2D << 13) | (m)) +#define RING_MF_(m) ((NV50_SUBCH_MF << 13) | (m)) + +#define RING_GR(gr, m) (((gr)->subc << 13) | (m)) + +int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); + +static inline uint32_t +nouveau_bo_tile_layout(struct nouveau_bo *bo) +{ + return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK; +} + +static INLINE void +nouveau_bo_validate(struct nouveau_channel *chan, + struct nouveau_bo *bo, unsigned flags) +{ + nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0); +} + +/* incremental methods */ +static INLINE void +BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (size << 18) | mthd); +} + +/* non-incremental */ +static INLINE void +BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0x2 << 29) | (size << 18) | mthd); +} + +static INLINE int +OUT_RESRCh(struct nouveau_channel *chan, struct nv04_resource *res, + unsigned delta, unsigned flags) +{ + return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE int +OUT_RESRCl(struct nouveau_channel *chan, struct nv04_resource *res, + unsigned delta, unsigned flags) +{ + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE void +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s) +{ + struct nouveau_subchannel *subc = &gr->channel->subc[s]; + + assert(s < 8); + if (subc->gr) { + assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT); + subc->gr->bound = NOUVEAU_GROBJ_UNBOUND; + } + subc->gr = gr; + subc->gr->subc = s; + subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT; + + BEGIN_RING(chan, RING_GR(gr, 0x0000), 1); + OUT_RING (chan, gr->handle); +} + +#endif diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile new file mode 100644 index 0000000000..e1cd188eec --- /dev/null +++ b/src/gallium/drivers/nvc0/Makefile @@ -0,0 +1,34 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nvc0 + +C_SOURCES = \ + nvc0_context.c \ + nvc0_draw.c \ + nvc0_formats.c \ + nvc0_miptree.c \ + nvc0_resource.c \ + nvc0_screen.c \ + nvc0_state.c \ + nvc0_state_validate.c \ + nvc0_surface.c \ + nvc0_tex.c \ + nvc0_transfer.c \ + nvc0_vbo.c \ + nvc0_program.c \ + nvc0_shader_state.c \ + nvc0_pc.c \ + nvc0_pc_print.c \ + nvc0_pc_emit.c \ + nvc0_tgsi_to_nc.c \ + nvc0_pc_optimize.c \ + nvc0_pc_regalloc.c \ + nvc0_push.c \ + nvc0_push2.c \ + nvc0_query.c + +LIBRARY_INCLUDES = \ + $(LIBDRM_CFLAGS) + +include ../../Makefile.template diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript new file mode 100644 index 0000000000..dbbbf663b3 --- /dev/null +++ b/src/gallium/drivers/nvc0/SConscript @@ -0,0 +1,33 @@ +Import('*') + +env = env.Clone() + +nvc0 = env.ConvenienceLibrary( + target = 'nvc0', + source = [ + 'nvc0_context.c', + 'nvc0_draw.c', + 'nvc0_formats.c', + 'nvc0_miptree.c', + 'nvc0_resource.c', + 'nvc0_screen.c', + 'nvc0_state.c', + 'nvc0_state_validate.c', + 'nvc0_surface.c', + 'nvc0_tex.c', + 'nvc0_transfer.c', + 'nvc0_vbo.c', + 'nvc0_program.c', + 'nvc0_shader_state.c', + 'nvc0_pc.c', + 'nvc0_pc_print.c', + 'nvc0_pc_emit.c', + 'nvc0_tgsi_to_nc.c', + 'nvc0_pc_optimize.c', + 'nvc0_pc_regalloc.c', + 'nvc0_push.c', + 'nvc0_push2.c', + 'nvc0_query.c' + ]) + +Export('nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h new file mode 100644 index 0000000000..aebcd510e8 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h @@ -0,0 +1,380 @@ +#ifndef NVC0_2D_XML +#define NVC0_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_2D_DST_FORMAT 0x00000200 + +#define NVC0_2D_DST_LINEAR 0x00000204 + +#define NVC0_2D_DST_TILE_MODE 0x00000208 + +#define NVC0_2D_DST_DEPTH 0x0000020c + +#define NVC0_2D_DST_LAYER 0x00000210 + +#define NVC0_2D_DST_PITCH 0x00000214 + +#define NVC0_2D_DST_WIDTH 0x00000218 + +#define NVC0_2D_DST_HEIGHT 0x0000021c + +#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NVC0_2D_DST_ADDRESS_LOW 0x00000224 + +#define NVC0_2D_UNK228 0x00000228 + +#define NVC0_2D_SRC_FORMAT 0x00000230 + +#define NVC0_2D_SRC_LINEAR 0x00000234 + +#define NVC0_2D_SRC_TILE_MODE 0x00000238 + +#define NVC0_2D_SRC_DEPTH 0x0000023c + +#define NVC0_2D_SRC_LAYER 0x00000240 + +#define NVC0_2D_SRC_PITCH 0x00000244 +#define NVC0_2D_SRC_PITCH__MAX 0x00040000 + +#define NVC0_2D_SRC_WIDTH 0x00000248 +#define NVC0_2D_SRC_WIDTH__MAX 0x00010000 + +#define NVC0_2D_SRC_HEIGHT 0x0000024c +#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NVC0_2D_UNK258 0x00000258 + +#define NVC0_2D_UNK260 0x00000260 + +#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NVC0_2D_COND_ADDRESS_LOW 0x00000268 + +#define NVC0_2D_COND_MODE 0x0000026c +#define NVC0_2D_COND_MODE_NEVER 0x00000000 +#define NVC0_2D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_2D_COND_MODE_EQUAL 0x00000003 +#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_2D_CLIP_X 0x00000280 + +#define NVC0_2D_CLIP_Y 0x00000284 + +#define NVC0_2D_CLIP_W 0x00000288 + +#define NVC0_2D_CLIP_H 0x0000028c + +#define NVC0_2D_CLIP_ENABLE 0x00000290 + +#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NVC0_2D_COLOR_KEY 0x00000298 + +#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NVC0_2D_ROP 0x000002a0 + +#define NVC0_2D_BETA1 0x000002a4 + +#define NVC0_2D_BETA4 0x000002a8 + +#define NVC0_2D_OPERATION 0x000002ac +#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NVC0_2D_OPERATION_ROP_AND 0x00000001 +#define NVC0_2D_OPERATION_BLEND_AND 0x00000002 +#define NVC0_2D_OPERATION_SRCCOPY 0x00000003 +#define NVC0_2D_OPERATION_UNK4 0x00000004 +#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NVC0_2D_UNK2B0 0x000002b0 +#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f +#define NVC0_2D_UNK2B0_UNK0__SHIFT 0 +#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00 +#define NVC0_2D_UNK2B0_UNK1__SHIFT 8 + +#define NVC0_2D_PATTERN_SELECT 0x000002b4 +#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 + +#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_Y8__LEN 0x00000010 +#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NVC0_2D_DRAW_SHAPE 0x00000580 +#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001 +#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NVC0_2D_DRAW_COLOR 0x00000588 + +#define NVC0_2D_UNK58C 0x0000058c +#define NVC0_2D_UNK58C_0 0x00000001 +#define NVC0_2D_UNK58C_1 0x00000010 +#define NVC0_2D_UNK58C_2 0x00000100 +#define NVC0_2D_UNK58C_3 0x00001000 + +#define NVC0_2D_DRAW_POINT16 0x000005e0 +#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NVC0_2D_DRAW_POINT16_X__SHIFT 0 +#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NVC0_2D_SIFC_FORMAT 0x00000804 + +#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NVC0_2D_SIFC_WIDTH 0x00000838 + +#define NVC0_2D_SIFC_HEIGHT 0x0000083c + +#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NVC0_2D_SIFC_DX_DU_INT 0x00000844 + +#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c + +#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NVC0_2D_SIFC_DST_X_INT 0x00000854 + +#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c + +#define NVC0_2D_SIFC_DATA 0x00000860 + +#define NVC0_2D_UNK0870 0x00000870 + +#define NVC0_2D_UNK0880 0x00000880 + +#define NVC0_2D_UNK0884 0x00000884 + +#define NVC0_2D_UNK0888 0x00000888 + +#define NVC0_2D_BLIT_CONTROL 0x0000088c +#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NVC0_2D_BLIT_DST_X 0x000008b0 + +#define NVC0_2D_BLIT_DST_Y 0x000008b4 + +#define NVC0_2D_BLIT_DST_W 0x000008b8 + +#define NVC0_2D_BLIT_DST_H 0x000008bc + +#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc + +#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc + + +#endif /* NVC0_2D_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h new file mode 100644 index 0000000000..94fa081ad7 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -0,0 +1,1244 @@ +#ifndef NVC0_3D_XML +#define NVC0_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07) +- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40) +- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20) +- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28) +- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104 +#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108 +#define NVC0_3D_NOTIFY 0x0000010c + +#define NVC0_3D_SERIALIZE 0x00000110 + +#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 + +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + +#define NVC0_3D_TESS_MODE 0x00000320 +#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f +#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 +#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000 +#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001 +#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002 +#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0 +#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4 +#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020 +#define NVC0_3D_TESS_MODE_CW 0x00000100 +#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200 + +#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004 + +#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002 + +#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c + +#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0)) +#define NVC0_3D_TFB__ESIZE 0x00000020 +#define NVC0_3D_TFB__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0)) + +#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) + +#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010 +#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004 + +#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 + +#define NVC0_3D_TFB_ENABLE 0x00000744 + +#define NVC0_3D_LOCAL_BASE 0x0000077c + +#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790 + +#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794 + +#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798 + +#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c + +#define NVC0_3D_RT(i0) (0x00000800 + 0x40*(i0)) +#define NVC0_3D_RT__ESIZE 0x00000040 +#define NVC0_3D_RT__LEN 0x00000008 + +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) + +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) + +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) + +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) + +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) + +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) +#define NVC0_3D_RT_TILE_MODE_X 0x00000001 +#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 +#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 +#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 +#define NVC0_3D_RT_TILE_MODE_LINEAR 0x00001000 +#define NVC0_3D_RT_TILE_MODE_UNK16 0x00010000 + +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 + +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) + +#define NVC0_3D_RT_BASE_LAYER(i0) (0x00000820 + 0x40*(i0)) + +#define NVC0_3D_RT_UNK14(i0) (0x00000824 + 0x40*(i0)) + +#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NVC0_3D_COUNTER_ENABLE 0x00000d68 +#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 +#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 +#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004 +#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008 +#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010 +#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020 +#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040 +#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080 +#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100 +#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200 +#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400 +#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800 +#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000 +#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000 +#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000 +#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000 + +#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NVC0_3D_CLEAR_DEPTH 0x00000d90 + +#define NVC0_3D_CLEAR_STENCIL 0x00000da0 + +#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NVC0_3D_PATCH_VERTICES 0x00000dcc + +#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010 +#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004 +#define NVC0_3D_MSAA_MASK__LEN 0x00000004 + +#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NVC0_3D_ZETA_FORMAT 0x00000fe8 + +#define NVC0_3D_ZETA_TILE_MODE 0x00000fec + +#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0 + +#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + +#define NVC0_3D_VERTEX_ID 0x00001118 + +#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000 + +#define NVC0_3D_RT_CONTROL 0x0000121c +#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NVC0_3D_ZETA_HORIZ 0x00001228 + +#define NVC0_3D_ZETA_VERT 0x0000122c + +#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NVC0_3D_LINKED_TSC 0x00001234 + +#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NVC0_3D_FP_RESULT_COUNT 0x00001298 + +#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NVC0_3D_D3D_FILL_MODE 0x000012d0 +#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NVC0_3D_SHADE_MODEL 0x000012d4 +#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U8 0x00001304 +#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NVC0_3D_D3D_CULL_MODE 0x00001308 +#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c +#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_ALPHA_TEST_REF 0x00001310 + +#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314 +#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NVC0_3D_BLEND_COLOR__LEN 0x00000004 + +#define NVC0_3D_TSC_FLUSH 0x00001330 +#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TIC_FLUSH 0x00001334 +#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TEX_CACHE_CTL 0x00001338 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NVC0_3D_STENCIL_ENABLE 0x00001380 + +#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 + +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c + +#define NVC0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac +#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NVC0_3D_LINE_WIDTH 0x000013b0 + +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NVC0_3D_VB_ELEMENT_BASE 0x00001434 + +#define NVC0_3D_VB_INSTANCE_BASE 0x00001438 + +#define NVC0_3D_CODE_CB_FLUSH 0x00001440 + +#define NVC0_3D_CLIPID_HEIGHT 0x00001504 +#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NVC0_3D_POINT_SIZE 0x00001518 + +#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVC0_3D_COUNTER_RESET 0x00001530 +#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002 +#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003 +#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004 +#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010 +#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011 +#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012 +#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013 +#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015 +#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016 +#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017 +#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018 +#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a +#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b +#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c +#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d +#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e +#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f + +#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NVC0_3D_ZETA_ENABLE 0x00001538 + +#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_3D_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_3D_COND_MODE 0x00001558 +#define NVC0_3D_COND_MODE_NEVER 0x00000000 +#define NVC0_3D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_3D_COND_MODE_EQUAL 0x00000003 +#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560 +#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NVC0_3D_TSC_LIMIT 0x00001564 +#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff + +#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_3D_TIC_LIMIT 0x0000157c + +#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_CSAA_ENABLE 0x000015b4 + +#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVC0_3D_LAYER 0x000015cc +#define NVC0_3D_LAYER_IDX__MASK 0x0000ffff +#define NVC0_3D_LAYER_IDX__SHIFT 0 +#define NVC0_3D_LAYER_USE_GP 0x00010000 + +#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0 +#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000 +#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a + +#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 + +#define NVC0_3D_VERTEX_END_D3D 0x000015d8 +#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4 + +#define NVC0_3D_VB_ELEMENT_U32 0x000015e8 + +#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U16 0x000015f0 +#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NVC0_3D_POINT_COORD_REPLACE 0x00001604 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 + +#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_3D_VERTEX_END_GL 0x00001614 +#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000 + +#define NVC0_3D_VERTEX_DATA 0x00001640 + +#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NVC0_3D_POINT_RASTER_RULES 0x0000165c +#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NVC0_3D_TEX_MISC 0x00001664 +#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680 + +#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVC0_3D_ZETA_BASE_LAYER 0x0000179c + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003 + +#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0)) +#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004 +#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004 + +#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_UNK17BC_LIMIT 0x000017c4 + +#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8 + +#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc + +#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0 + +#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4 + +#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8 + +#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc + +#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0 + +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020 + +#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910 + +#define NVC0_3D_CULL_FACE_ENABLE 0x00001918 + +#define NVC0_3D_FRONT_FACE 0x0000191c +#define NVC0_3D_FRONT_FACE_CW 0x00000900 +#define NVC0_3D_FRONT_FACE_CCW 0x00000901 + +#define NVC0_3D_CULL_FACE 0x00001920 +#define NVC0_3D_CULL_FACE_FRONT 0x00000404 +#define NVC0_3D_CULL_FACE_BACK 0x00000405 +#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 + +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c + +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 + +#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c +#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 +#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010 + +#define NVC0_3D_CLIPID_ENABLE 0x0000197c + +#define NVC0_3D_CLIPID_WIDTH 0x00001980 +#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NVC0_3D_CLIPID_ID 0x00001984 + +#define NVC0_3D_FP_CONTROL 0x000019a8 +#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001 +#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100 +#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000 + +#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NVC0_3D_LOGIC_OP 0x000019c8 +#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500 +#define NVC0_3D_LOGIC_OP_AND 0x00001501 +#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NVC0_3D_LOGIC_OP_COPY 0x00001503 +#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NVC0_3D_LOGIC_OP_NOOP 0x00001505 +#define NVC0_3D_LOGIC_OP_XOR 0x00001506 +#define NVC0_3D_LOGIC_OP_OR 0x00001507 +#define NVC0_3D_LOGIC_OP_NOR 0x00001508 +#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509 +#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a +#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NVC0_3D_LOGIC_OP_NAND 0x0000150e +#define NVC0_3D_LOGIC_OP_SET 0x0000150f + +#define NVC0_3D_CLEAR_BUFFERS 0x000019d0 +#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002 +#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004 +#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008 +#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010 +#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020 +#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NVC0_3D_CLIPID_FILL 0x000019d4 + +#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 +#define NVC0_3D_COLOR_MASK__LEN 0x00000008 +#define NVC0_3D_COLOR_MASK_R 0x0000000f +#define NVC0_3D_COLOR_MASK_G 0x000000f0 +#define NVC0_3D_COLOR_MASK_B 0x00000f00 +#define NVC0_3D_COLOR_MASK_A 0x0000f000 + +#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_3D_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_3D_QUERY_GET 0x00001b0c +#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_3D_QUERY_GET_MODE__SHIFT 0 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NVC0_3D_QUERY_GET_FENCE 0x00000010 +#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0 +#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5 +#define NVC0_3D_QUERY_GET_UNK8 0x00000100 +#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12 +#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NVC0_3D_QUERY_GET_INTR 0x00100000 +#define NVC0_3D_QUERY_GET_UNK21 0x00200000 +#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000 +#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23 +#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000 +#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000 +#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000 +#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000 +#define NVC0_3D_QUERY_GET_SHORT 0x10000000 + +#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000 + +#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020 + +#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVC0_3D_IBLEND__ESIZE 0x00000020 +#define NVC0_3D_IBLEND__LEN 0x00000008 + +#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020 + +#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP__ESIZE 0x00000040 +#define NVC0_3D_SP__LEN 0x00000006 + +#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP_SELECT_ENABLE 0x00000001 +#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070 +#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010 +#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020 +#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030 +#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040 +#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050 + +#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0)) + +#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0)) + +#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) +#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 +#define NVC0_3D_TEX_LIMITS__LEN 0x00000005 + +#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0)) +#define NVC0_3D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_3D_FIRMWARE__LEN 0x00000020 + +#define NVC0_3D_CB_SIZE 0x00002380 + +#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_3D_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_3D_CB_POS 0x0000238c + +#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_3D_CB_DATA__ESIZE 0x00000004 +#define NVC0_3D_CB_DATA__LEN 0x00000010 + +#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0)) +#define NVC0_3D_BIND_TSC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TSC__LEN 0x00000005 +#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_3D_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0)) +#define NVC0_3D_BIND_TIC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TIC__LEN 0x00000005 +#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_3D_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0)) +#define NVC0_3D_CB_BIND__ESIZE 0x00000020 +#define NVC0_3D_CB_BIND__LEN 0x00000005 +#define NVC0_3D_CB_BIND_VALID 0x00000001 +#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0 +#define NVC0_3D_CB_BIND_INDEX__SHIFT 4 + +#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 + +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) +#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 + +#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 + +#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820 + +#define NVC0_3D_BLEND_ENABLES 0x00003858 + +#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868 +#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NVC0_3D_POLYGON_MODE_BACK 0x00003870 +#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NVC0_3D_GP_SELECT 0x00003878 + +#define NVC0_3D_TEP_SELECT 0x00003880 + + +#endif /* NVC0_3D_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h new file mode 100644 index 0000000000..84b152213a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c new file mode 100644 index 0000000000..5d2168e600 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -0,0 +1,208 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" + +#include "nvc0_context.h" +#include "nvc0_screen.h" +#include "nvc0_resource.h" + +#include "nouveau/nouveau_reloc.h" + +static void +nvc0_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence) +{ + struct nouveau_screen *screen = &nvc0_context(pipe)->screen->base; + + if (fence) + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + /* Try to emit before firing to avoid having to flush again right after + * in case we have to wait on this fence. + */ + nouveau_fence_emit(screen->fence.current); + + FIRE_RING(screen->channel); +} + +static void +nvc0_context_unreference_resources(struct nvc0_context *nvc0) +{ + unsigned s, i; + + for (i = 0; i < NVC0_BUFCTX_COUNT; ++i) + nvc0_bufctx_reset(nvc0, i); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + + for (s = 0; s < 5; ++s) { + for (i = 0; i < nvc0->num_textures[s]; ++i) + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + + for (i = 0; i < 16; ++i) + pipe_resource_reference(&nvc0->constbuf[s][i], NULL); + } + + for (i = 0; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); +} + +static void +nvc0_destroy(struct pipe_context *pipe) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0_context_unreference_resources(nvc0); + + draw_destroy(nvc0->draw); + + if (nvc0->screen->cur_ctx == nvc0) { + nvc0->screen->base.channel->user_private = NULL; + nvc0->screen->cur_ctx = NULL; + } + + FREE(nvc0); +} + +void +nvc0_default_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + if (!nvc0) + return; + + nouveau_fence_update(&nvc0->screen->base, TRUE); + nouveau_fence_next(&nvc0->screen->base); +} + +struct pipe_context * +nvc0_create(struct pipe_screen *pscreen, void *priv) +{ + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nvc0_context *nvc0; + struct pipe_context *pipe; + + nvc0 = CALLOC_STRUCT(nvc0_context); + if (!nvc0) + return NULL; + pipe = &nvc0->base.pipe; + + nvc0->screen = screen; + nvc0->base.screen = &screen->base; + nvc0->base.copy_data = nvc0_m2mf_copy_linear; + nvc0->base.push_data = nvc0_m2mf_push_linear; + + pipe->winsys = pipe_winsys; + pipe->screen = pscreen; + pipe->priv = priv; + + pipe->destroy = nvc0_destroy; + + pipe->draw_vbo = nvc0_draw_vbo; + pipe->clear = nvc0_clear; + + pipe->flush = nvc0_flush; + + if (!screen->cur_ctx) + screen->cur_ctx = nvc0; + screen->base.channel->user_private = nvc0; + screen->base.channel->flush_notify = nvc0_default_flush_notify; + + nvc0_init_query_functions(nvc0); + nvc0_init_surface_functions(nvc0); + nvc0_init_state_functions(nvc0); + nvc0_init_resource_functions(pipe); + + nvc0->draw = draw_create(pipe); + assert(nvc0->draw); + draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + + return pipe; +} + +struct resident { + struct nv04_resource *res; + uint32_t flags; +}; + +void +nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, + struct nv04_resource *resource, uint32_t flags) +{ + struct resident rsd = { resource, flags }; + + if (!resource->bo) + return; + + /* We don't need to reference the resource here, it will be referenced + * in the context/state, and bufctx will be reset when state changes. + */ + util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd); +} + +void +nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, + struct nv04_resource *resource) +{ + struct resident *rsd, *top; + unsigned i; + + for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) { + rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i); + + if (rsd->res == resource) { + top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); + if (rsd != top) + *rsd = *top; + break; + } + } +} + +void +nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) +{ + struct resident *rsd; + struct util_dynarray *array; + unsigned ctx, i, n; + + for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { + array = &nvc0->residents[ctx]; + + n = array->size / sizeof(struct resident); + MARK_RING(nvc0->screen->base.channel, n, n); + for (i = 0; i < n; ++i) { + rsd = util_dynarray_element(array, struct resident, i); + + nvc0_resource_validate(rsd->res, rsd->flags); + } + } + + nvc0_screen_make_buffers_resident(nvc0->screen); +} diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h new file mode 100644 index 0000000000..102997e4fc --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -0,0 +1,244 @@ +#ifndef __NVC0_CONTEXT_H__ +#define __NVC0_CONTEXT_H__ + +#include <stdio.h> +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#include "draw/draw_vertex.h" + +#include "nvc0_winsys.h" +#include "nvc0_stateobj.h" +#include "nvc0_screen.h" +#include "nvc0_program.h" +#include "nvc0_resource.h" + +#include "nouveau/nouveau_context.h" + +#include "nvc0_3ddefs.xml.h" +#include "nvc0_3d.xml.h" +#include "nvc0_2d.xml.h" +#include "nvc0_m2mf.xml.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); + +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif + +#define NVC0_NEW_BLEND (1 << 0) +#define NVC0_NEW_RASTERIZER (1 << 1) +#define NVC0_NEW_ZSA (1 << 2) +#define NVC0_NEW_VERTPROG (1 << 3) +#define NVC0_NEW_TCTLPROG (1 << 4) +#define NVC0_NEW_TEVLPROG (1 << 5) +#define NVC0_NEW_GMTYPROG (1 << 6) +#define NVC0_NEW_FRAGPROG (1 << 7) +#define NVC0_NEW_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_STENCIL_REF (1 << 9) +#define NVC0_NEW_CLIP (1 << 10) +#define NVC0_NEW_SAMPLE_MASK (1 << 11) +#define NVC0_NEW_FRAMEBUFFER (1 << 12) +#define NVC0_NEW_STIPPLE (1 << 13) +#define NVC0_NEW_SCISSOR (1 << 14) +#define NVC0_NEW_VIEWPORT (1 << 15) +#define NVC0_NEW_ARRAYS (1 << 16) +#define NVC0_NEW_VERTEX (1 << 17) +#define NVC0_NEW_CONSTBUF (1 << 18) +#define NVC0_NEW_TEXTURES (1 << 19) +#define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB (1 << 21) +#define NVC0_NEW_TFB_BUFFERS (1 << 22) + +#define NVC0_BUFCTX_CONSTANT 0 +#define NVC0_BUFCTX_FRAME 1 +#define NVC0_BUFCTX_VERTEX 2 +#define NVC0_BUFCTX_TEXTURES 3 +#define NVC0_BUFCTX_COUNT 4 + +struct nvc0_context { + struct nouveau_context base; + + struct nvc0_screen *screen; + + struct util_dynarray residents[NVC0_BUFCTX_COUNT]; + + uint32_t dirty; + + struct { + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + int32_t index_bias; + boolean prim_restart; + boolean early_z; + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[5]; + uint8_t num_samplers[5]; + uint8_t tls_required; /* bitmask of shader types using l[] */ + uint16_t scissor; + uint32_t uniform_buffer_bound[5]; + } state; + + struct nvc0_blend_stateobj *blend; + struct nvc0_rasterizer_stateobj *rast; + struct nvc0_zsa_stateobj *zsa; + struct nvc0_vertex_stateobj *vertex; + + struct nvc0_program *vertprog; + struct nvc0_program *tctlprog; + struct nvc0_program *tevlprog; + struct nvc0_program *gmtyprog; + struct nvc0_program *fragprog; + + struct pipe_resource *constbuf[5][16]; + uint16_t constbuf_dirty[5]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ + unsigned vbo_max_index; + + struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS]; + unsigned num_textures[5]; + struct nv50_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[5]; + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_push_hint; + + struct nvc0_transform_feedback_state *tfb; + struct pipe_resource *tfbbuf[4]; + unsigned num_tfbbufs; + unsigned tfb_offset[4]; + + struct draw_context *draw; +}; + +#define NVC0_USING_EDGEFLAG(ctx) \ + ((ctx)->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS) + +static INLINE struct nvc0_context * +nvc0_context(struct pipe_context *pipe) +{ + return (struct nvc0_context *)pipe; +} + +struct nvc0_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct nvc0_surface * +nvc0_surface(struct pipe_surface *ps) +{ + return (struct nvc0_surface *)ps; +} + +/* nvc0_context.c */ +struct pipe_context *nvc0_create(struct pipe_screen *, void *); + +void nvc0_default_flush_notify(struct nouveau_channel *); + +void nvc0_bufctx_emit_relocs(struct nvc0_context *); +void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, + struct nv04_resource *, uint32_t flags); +void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, + struct nv04_resource *); +static INLINE void +nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) +{ + util_dynarray_resize(&nvc0->residents[ctx], 0); +} + +/* nvc0_draw.c */ +extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); + +/* nvc0_program.c */ +boolean nvc0_program_translate(struct nvc0_program *); +void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); + +/* nvc0_query.c */ +void nvc0_init_query_functions(struct nvc0_context *); + +/* nvc0_shader_state.c */ +void nvc0_vertprog_validate(struct nvc0_context *); +void nvc0_tctlprog_validate(struct nvc0_context *); +void nvc0_tevlprog_validate(struct nvc0_context *); +void nvc0_gmtyprog_validate(struct nvc0_context *); +void nvc0_fragprog_validate(struct nvc0_context *); + +void nvc0_tfb_validate(struct nvc0_context *); + +/* nvc0_state.c */ +extern void nvc0_init_state_functions(struct nvc0_context *); + +/* nvc0_state_validate.c */ +extern boolean nvc0_state_validate(struct nvc0_context *); + +/* nvc0_surface.c */ +extern void nvc0_clear(struct pipe_context *, unsigned buffers, + const float *rgba, double depth, unsigned stencil); +extern void nvc0_init_surface_functions(struct nvc0_context *); + +/* nvc0_tex.c */ +void nvc0_validate_textures(struct nvc0_context *); +void nvc0_validate_samplers(struct nvc0_context *); + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nvc0_transfer.c */ +void +nvc0_m2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, void *data); +void +nvc0_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size); + +/* nvc0_vbo.c */ +void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0); + +/* nvc0_push.c */ +void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); +void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c new file mode 100644 index 0000000000..ac7e9f66a1 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nvc0_context.h" + +struct nvc0_render_stage { + struct draw_stage stage; + struct nvc0_context *nvc0; +}; + +static INLINE struct nvc0_render_stage * +nvc0_render_stage(struct draw_stage *stage) +{ + return (struct nvc0_render_stage *)stage; +} + +static void +nvc0_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nvc0_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nvc0_draw_render_stage(struct nvc0_context *nvc0) +{ + struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage); + + rs->nvc0 = nvc0; + rs->stage.draw = nvc0->draw; + rs->stage.destroy = nvc0_render_destroy; + rs->stage.point = nvc0_render_point; + rs->stage.line = nvc0_render_line; + rs->stage.tri = nvc0_render_tri; + rs->stage.flush = nvc0_render_flush; + rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter; + + return &rs->stage; +} diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c new file mode 100644 index 0000000000..454c744063 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_formats.c @@ -0,0 +1,523 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_screen.h" +#include "nvc0_3d.xml.h" +#include "nv50/nv50_defs.xml.h" +#include "nv50/nv50_texture.xml.h" +#include "pipe/p_defines.h" + +#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \ + (r << 31) + +#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, 0 + +#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER +#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW +#define RENDER_TARGET PIPE_BIND_RENDER_TARGET +#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL +#define SCANOUT PIPE_BIND_SCANOUT + +/* for vertex buffers: */ +#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 +#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32 + +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +{ + /* COMMON FORMATS */ + + [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, + A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, + A_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), + SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), + SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B5G5R5X1_UNORM] = { 0, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), + SAMPLER_VIEW | SCANOUT }, + + [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0, + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), + SAMPLER_VIEW }, + + [PIPE_FORMAT_B4G4R4X4_UNORM] = { 0, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, + + [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, + + /* DEPTH/STENCIL FORMATS */ + + [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z16, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, S8Z24, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, + B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, X8Z24, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_Z24S8_UNORM, + B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, Z24S8, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, Z32, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { + NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, UINT, UINT, UINT, X24S8Z32, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + /* LUMINANCE, ALPHA, INTENSITY */ + + [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_I16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, + A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_A16_UNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L8A8_UNORM] = { 0, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L8A8_SRGB] = { 0, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L16A16_UNORM] = { 0, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L4A4_UNORM] = { 0, + B_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 4_4, 0), + SAMPLER_VIEW }, + + /* DXT, RGTC */ + + [PIPE_FORMAT_DXT1_RGB] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT1_RGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT3_RGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT5_RGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT1_SRGB] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT1_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT3_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT5_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC1_UNORM] = { 0, + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC1_SNORM] = { 0, + B_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC2_UNORM] = { 0, + B_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC2, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC2_SNORM] = { 0, + B_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, RGTC2, 0), + SAMPLER_VIEW }, + + /* FLOAT 16 */ + + [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, + A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, + A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* FLOAT 32 */ + + [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, + A_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, + A_(C0, C1, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + A_(C0, ZERO, ZERO, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* ODD FORMATS */ + + [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, + + [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, + B_(C0, C1, C2, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), + SAMPLER_VIEW }, + + /* SNORM 32 */ + + [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32B32_SNORM] = { 0, + A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32_SNORM] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32_SNORM] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* UNORM 32 */ + + [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32B32_UNORM] = { 0, + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32_UNORM] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32_UNORM] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* SNORM 16 */ + + [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16B16_SNORM] = { 0, + A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* UNORM 16 */ + + [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16B16_UNORM] = { 0, + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* SNORM 8 */ + + [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8_SNORM] = { 0, + A_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* UNORM 8 */ + + [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, + A_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* SSCALED 32 (not integer, converted to float on fetch !) */ + + [PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0, + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R32G32B32_SSCALED] = { 0, + A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R32G32_SSCALED] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R32_SSCALED] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0), + VERTEX_BUFFER }, + + /* USCALED 32 */ + + [PIPE_FORMAT_R32G32B32A32_USCALED] = { 0, + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R32G32B32_USCALED] = { 0, + A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R32G32_USCALED] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32_32, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R32_USCALED] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 32, 0), + VERTEX_BUFFER }, + + /* SSCALED 16 */ + + [PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0, + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R16G16B16_SSCALED] = { 0, + A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R16G16_SSCALED] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R16_SSCALED] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0), + VERTEX_BUFFER }, + + /* USCALED 16 */ + + [PIPE_FORMAT_R16G16B16A16_USCALED] = { 0, + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R16G16B16_USCALED] = { 0, + A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R16G16_USCALED] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16_16, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R16_USCALED] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 16, 0), + VERTEX_BUFFER }, + + /* SSCALED 8 */ + + [PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0, + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R8G8B8_SSCALED] = { 0, + A_(C0, C1, C2, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R8G8_SSCALED] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R8_SSCALED] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0), + VERTEX_BUFFER }, + + /* USCALED 8 */ + + [PIPE_FORMAT_R8G8B8A8_USCALED] = { 0, + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R8G8B8_USCALED] = { 0, + A_(C0, C1, C2, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R8G8_USCALED] = { 0, + A_(C0, C1, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8_8, 0), + VERTEX_BUFFER }, + + [PIPE_FORMAT_R8_USCALED] = { 0, + A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0), + VERTEX_BUFFER }, + + /* OTHER FORMATS */ + + [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R1_UNORM] = { 0, + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0), + SAMPLER_VIEW }, +}; diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h new file mode 100644 index 0000000000..b7d0d3eafa --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -0,0 +1,235 @@ + +#ifndef __NVC0_PGRAPH_MACROS_H__ +#define __NVC0_PGRAPH_MACROS_H__ + +/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1 + * with bits [src:src+size) in r2 + * + * bra(n)z annul: no delay slot + */ + +/* The comments above the macros describe what they *should* be doing, + * but we use less functionality for now. + */ + +/* + * for (i = 0; i < 8; ++i) + * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg); + * + * [3428] = arg; + * + * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0) + * [0d9c] = 0; + * else + * [0d9c] = [342c]; + */ +static const uint32_t nvc0_9097_blend_enables[] = +{ + 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */ + 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */ + 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */ + 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */ + 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */ + 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */ + 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */ + 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */ + 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */ +}; + +/* + * uint64 limit = (parm(0) << 32) | parm(1); + * uint64 start = (parm(2) << 32); + * + * if (limit) { + * start |= parm(3); + * --limit; + * } else { + * start |= 1; + * } + * + * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff; + * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff; + * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff; + * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff; + */ +static const uint32_t nvc0_9097_vertex_array_select[] = +{ + 0x00000201, /* 0x00: parm $r2 */ + 0x00000301, /* 0x01: parm $r3 */ + 0x00000401, /* 0x02: parm $r4 */ + 0x00000501, /* 0x03: parm $r5 */ + 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */ + 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */ + 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */ + 0x00002041, /* 0x07: send $r4 */ + 0x00002841, /* 0x08: send $r5 */ + 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */ + 0x000010c1, /* 0x0a: exit send $r2 */ + 0x00001841, /* 0x0b: send $r3 */ +}; + +static const uint32_t nvc0_9097_color_mask_brdc[] = +{ + 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x000008c1, /* exit send $r1 */ + 0x00000841, /* send $r1 */ +}; + +/* + * [GL_POLYGON_MODE_FRONT] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +static const uint32_t nvc0_9097_poly_mode_front[] = +{ + 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00dac021, /* 0x08: maddr 0x36b */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [GL_POLYGON_MODE_BACK] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +/* NOTE: 0x3410 = 0x80002006 by default, + * POLYGON_MODE == GL_LINE check replaced by (MODE & 1) + * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1 + */ +static const uint32_t nvc0_9097_poly_mode_back[] = +{ + 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00db0021, /* 0x08: maddr 0x36c */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(4)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41) + * [02ec] = 0 + * else + * if (any POLYGON MODE == LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1 + */ +static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x020c0415, /* 0x03: read $r4 0x830 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x02100021, /* 0x08: maddr 0x840 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(3)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if (arg == 0x31) { + * if (BIT(2 of [0x3430])) { + * int i = 15; do { --i; } while(i); + * [0x1a2c] = 0; + * } + * } + * + * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31) + * [02ec] = 0 + * else + * if ([any POLYGON_MODE] == GL_LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; + */ +static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x02100415, /* 0x03: read $r4 0x840 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x020c0021, /* 0x08: maddr 0x830 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h new file mode 100644 index 0000000000..3bf628d425 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h @@ -0,0 +1,138 @@ +#ifndef NVC0_M2MF_XML +#define NVC0_M2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <arthur.huillet@free.fr> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <koala_br@users.sourceforge.net> (koala_br) +- Carlos Martin <carlosmn@users.sf.net> (carlosmn) +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) +- Dawid Gajownik <gajownik@users.sf.net> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) +- EdB <edb_@users.sf.net> (edb_) +- Erik Waling <erikwailing@users.sf.net> (erikwaling) +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez) +- imirkin <imirkin@users.sf.net> (imirkin) +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) +- Laurent Carlier <lordheavym@gmail.com> (lordheavy) +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) +- Maarten Maathuis <madman2003@gmail.com> (stillunknown) +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) +- Mark Carey <mark.carey@gmail.com> (careym) +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c) +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman) +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) +- Peter Popov <ironpeter@users.sf.net> (ironpeter) +- Richard Hughes <hughsient@users.sf.net> (hughsient) +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier) +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) +- sturmflut <sturmflut@users.sf.net> (sturmflut) +- Sylvain Munaut <tnt@246tNt.com> +- Victor Stinner <victor.stinner@haypocalc.com> (haypo) +- Wladmir van der Laan <laanwj@gmail.com> (miathan6) +- Younes Manton <younes.m@gmail.com> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_M2MF_TILING_MODE_IN 0x00000204 + +#define NVC0_M2MF_TILING_PITCH_IN 0x00000208 + +#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c + +#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210 + +#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214 + +#define NVC0_M2MF_TILING_MODE_OUT 0x00000220 + +#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224 + +#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228 + +#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c + +#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230 + +#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238 + +#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c + +#define NVC0_M2MF_EXEC 0x00000300 +#define NVC0_M2MF_EXEC_PUSH 0x00000001 +#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010 +#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100 +#define NVC0_M2MF_EXEC_NOTIFY 0x00002000 +#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000 +#define NVC0_M2MF_EXEC_INC__SHIFT 20 + +#define NVC0_M2MF_DATA 0x00000304 + +#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c + +#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310 + +#define NVC0_M2MF_PITCH_IN 0x00000314 + +#define NVC0_M2MF_PITCH_OUT 0x00000318 + +#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c + +#define NVC0_M2MF_LINE_COUNT 0x00000320 + +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c + +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330 + +#define NVC0_M2MF_NOTIFY 0x00000334 + +#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344 + +#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348 + +#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c + +#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350 + + +#endif /* NVC0_M2MF_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c new file mode 100644 index 0000000000..bced324552 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -0,0 +1,319 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nvc0_transfer.h" + +static INLINE uint32_t +get_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ + uint32_t tile_mode = 0x000; + + if (ny > 64) tile_mode = 0x040; /* height 128 tiles */ + else + if (ny > 32) tile_mode = 0x030; /* height 64 tiles */ + else + if (ny > 16) tile_mode = 0x020; /* height 32 tiles */ + else + if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ + + if (nz == 1) + return tile_mode; + else + if (tile_mode > 0x020) + tile_mode = 0x020; + + if (nz > 16 && tile_mode < 0x020) + return tile_mode | 0x500; /* depth 32 tiles */ + if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ + if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ + if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + + return tile_mode | 0x100; +} + +uint32_t +nvc0_miptree_zslice_offset(struct nvc0_miptree *mt, unsigned l, unsigned z) +{ + unsigned nblocksy; /* height of texture level aligned to tile height */ + + unsigned stride_2d; /* to next slice within a 3D tile */ + unsigned stride_3d; /* to slice in the next (in z direction !) 3D tile */ + + unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(mt->level[l].tile_mode, 2); + unsigned tile_d = 1 << tile_d_shift; + + nblocksy = util_format_get_nblocksy(mt->base.base.format, + u_minify(mt->base.base.height0, l)); + + nblocksy = align(nblocksy, NVC0_TILE_HEIGHT(mt->level[l].tile_mode)); + + stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode); + + stride_3d = (nblocksy * mt->level[l].pitch) << tile_d_shift; + + return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; +} + +static void +nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +{ + struct nvc0_miptree *mt = nvc0_miptree(pt); + + nouveau_screen_bo_release(pscreen, mt->base.bo); + + FREE(mt); +} + +static boolean +nvc0_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle) +{ + struct nvc0_miptree *mt = nvc0_miptree(pt); + unsigned stride; + + if (!mt || !mt->base.bo) + return FALSE; + + stride = util_format_get_stride(mt->base.base.format, + mt->base.base.width0); + + return nouveau_screen_bo_get_handle(pscreen, + mt->base.bo, + stride, + whandle); +} + +const struct u_resource_vtbl nvc0_miptree_vtbl = +{ + nvc0_miptree_get_handle, /* get_handle */ + nvc0_miptree_destroy, /* resource_destroy */ + nvc0_miptree_transfer_new, /* get_transfer */ + nvc0_miptree_transfer_del, /* transfer_destroy */ + nvc0_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nvc0_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree); + struct pipe_resource *pt = &mt->base.base; + int ret; + unsigned w, h, d, l, alloc_size; + uint32_t tile_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nvc0_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0; + h = pt->height0; + d = mt->layout_3d ? pt->depth0 : 1; + + switch (pt->format) { + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x0700; /* COMPRESSED */ + tile_flags = 0x0100; /* NORMAL */ + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ + tile_flags = 0x4600; /* NORMAL */ + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + tile_flags = 0x1100; /* NORMAL */ + if (w * h >= 128 * 128 && 0) + tile_flags = 0x1700; /* COMPRESSED, requires magic */ + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + tile_flags = 0xf500; /* COMPRESSED */ + tile_flags = 0xf700; /* MSAA 2 */ + tile_flags = 0xf900; /* MSAA 4 */ + tile_flags = 0xfe00; /* NORMAL */ + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + tile_flags = 0xce00; /* COMPRESSED */ + tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */ + tile_flags = 0xd000; /* MSAA 4, COMPRESSED */ + tile_flags = 0xc300; /* NORMAL */ + break; + case PIPE_FORMAT_R16G16B16A16_UNORM: + tile_flags = 0xe900; /* COMPRESSED */ + tile_flags = 0xfe00; /* NORMAL */ + break; + default: + tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ + tile_flags = 0xfe00; /* NORMAL 32 BIT */ + if (w * h >= 128 * 128 && 0) + tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */ + break; + } + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + for (l = 0; l <= pt->last_level; ++l) { + struct nvc0_miptree_level *lvl = &mt->level[l]; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + unsigned blocksize = util_format_get_blocksize(pt->format); + + lvl->offset = mt->total_size; + lvl->tile_mode = get_tile_dims(nbx, nby, d); + lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode)); + + mt->total_size += lvl->pitch * + align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) * + align(d, NVC0_TILE_DEPTH(lvl->tile_mode)); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NVC0_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } + + alloc_size = mt->total_size; + if (tile_flags == 0x1700) + alloc_size *= 3; /* HiZ, XXX: correct size */ + + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, + mt->level[0].tile_mode, tile_flags, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + + return pt; +} + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + struct nvc0_miptree *mt; + unsigned stride; + + /* only supports 2D, non-mipmapped textures for the moment */ + if ((templ->target != PIPE_TEXTURE_2D && + templ->target != PIPE_TEXTURE_RECT) || + templ->last_level != 0 || + templ->depth0 != 1 || + templ->array_size > 1) + return NULL; + + mt = CALLOC_STRUCT(nvc0_miptree); + if (!mt) + return NULL; + + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + + mt->base.base = *templ; + mt->base.vtbl = &nvc0_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->level[0].pitch = stride; + mt->level[0].offset = 0; + mt->level[0].tile_mode = mt->base.bo->tile_mode; + + /* no need to adjust bo reference count */ + return &mt->base.base; +} + + +/* Surface functions. + */ + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */ + struct nvc0_surface *ns; + struct pipe_surface *ps; + struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level]; + + ns = CALLOC_STRUCT(nvc0_surface); + if (!ns) + return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = templ->format; + ps->usage = templ->usage; + ps->u.tex.level = templ->u.tex.level; + ps->u.tex.first_layer = templ->u.tex.first_layer; + ps->u.tex.last_layer = templ->u.tex.last_layer; + + ns->width = u_minify(pt->width0, ps->u.tex.level); + ns->height = u_minify(pt->height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = lvl->offset; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + + return ps; +} + +void +nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) +{ + struct nvc0_surface *s = nvc0_surface(ps); + + pipe_resource_reference(&ps->texture, NULL); + + FREE(s); +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c new file mode 100644 index 0000000000..bd85a7f1ff --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -0,0 +1,716 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define NOUVEAU_DEBUG 1 + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +uint8_t +nvc0_ir_reverse_cc(uint8_t cc) +{ + static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + + return cc_swapped[cc & 7] | (cc & ~7); +} + +boolean +nvc0_insn_can_load(struct nv_instruction *nvi, int s, + struct nv_instruction *ld) +{ + int i; + + if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) { + if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s))) + return FALSE; + if (!(nvc0_op_info_table[nvi->opcode].immediate & 4)) + if (ld->src[0]->value->reg.imm.u32 & 0xfff) + return FALSE; + } else + if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s))) + return FALSE; + + if (ld->indirect >= 0) + return FALSE; + + /* a few ops can use g[] sources directly, but we don't support g[] yet */ + if (ld->src[0]->value->reg.file == NV_FILE_MEM_L || + ld->src[0]->value->reg.file == NV_FILE_MEM_G) + return FALSE; + + for (i = 0; i < 3 && nvi->src[i]; ++i) + if (nvi->src[i]->value->reg.file == NV_FILE_IMM) + return FALSE; + + return TRUE; +} + +/* Return whether this instruction can be executed conditionally. */ +boolean +nvc0_insn_is_predicateable(struct nv_instruction *nvi) +{ + if (nvi->predicate >= 0) /* already predicated */ + return FALSE; + if (!nvc0_op_info_table[nvi->opcode].predicate && + !nvc0_op_info_table[nvi->opcode].pseudo) + return FALSE; + return TRUE; +} + +int +nvc0_insn_refcount(struct nv_instruction *nvi) +{ + int rc = 0; + int i; + for (i = 0; i < 5 && nvi->def[i]; ++i) { + if (!nvi->def[i]) + return rc; + rc += nvi->def[i]->refc; + } + return rc; +} + +int +nvc0_pc_replace_value(struct nv_pc *pc, + struct nv_value *old_val, + struct nv_value *new_val) +{ + int i, n, s; + + if (old_val == new_val) + return old_val->refc; + + for (i = 0, n = 0; i < pc->num_refs; ++i) { + if (pc->refs[i]->value == old_val) { + ++n; + for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s) + if (pc->refs[i]->insn->src[s] == pc->refs[i]) + break; + assert(s < 6); + nv_reference(pc, pc->refs[i]->insn, s, new_val); + } + } + return n; +} + +static INLINE boolean +is_gpr63(struct nv_value *val) +{ + return (val->reg.file == NV_FILE_GPR && val->reg.id == 63); +} + +struct nv_value * +nvc0_pc_find_constant(struct nv_ref *ref) +{ + struct nv_value *src; + + if (!ref) + return NULL; + + src = ref->value; + while (src->insn && src->insn->opcode == NV_OP_MOV) { + assert(!src->insn->src[0]->mod); + src = src->insn->src[0]->value; + } + if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) || + (src->insn && + src->insn->opcode == NV_OP_LD && + src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && + src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) + return src; + return NULL; +} + +struct nv_value * +nvc0_pc_find_immediate(struct nv_ref *ref) +{ + struct nv_value *src = nvc0_pc_find_constant(ref); + + return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL; +} + +static void +nv_pc_free_refs(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_refs; i += 64) + FREE(pc->refs[i]); + FREE(pc->refs); +} + +static const char * +edge_name(ubyte type) +{ + switch (type) { + case CFG_EDGE_FORWARD: return "forward"; + case CFG_EDGE_BACK: return "back"; + case CFG_EDGE_LOOP_ENTER: return "loop"; + case CFG_EDGE_LOOP_LEAVE: return "break"; + case CFG_EDGE_FAKE: return "fake"; + default: + return "?"; + } +} + +void +nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, + void *priv) +{ + struct nv_basic_block *bb[64], *bbb[16], *b; + int j, p, pp; + + bb[0] = root; + p = 1; + pp = 0; + + while (p > 0) { + b = bb[--p]; + b->priv = 0; + + for (j = 1; j >= 0; --j) { + if (!b->out[j]) + continue; + + switch (b->out_kind[j]) { + case CFG_EDGE_BACK: + continue; + case CFG_EDGE_FORWARD: + case CFG_EDGE_FAKE: + if (++b->out[j]->priv == b->out[j]->num_in) + bb[p++] = b->out[j]; + break; + case CFG_EDGE_LOOP_ENTER: + bb[p++] = b->out[j]; + break; + case CFG_EDGE_LOOP_LEAVE: + if (!b->out[j]->priv) { + bbb[pp++] = b->out[j]; + b->out[j]->priv = 1; + } + break; + default: + assert(0); + break; + } + } + + f(priv, b); + + if (!p) { + p = pp; + for (; pp > 0; --pp) + bb[pp - 1] = bbb[pp - 1]; + } + } +} + +static void +nv_do_print_function(void *priv, struct nv_basic_block *b) +{ + struct nv_instruction *i; + + debug_printf("=== BB %i ", b->id); + if (b->out[0]) + debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); + if (b->out[1]) + debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); + debug_printf("===\n"); + + i = b->phi; + if (!i) + i = b->entry; + for (; i; i = i->next) + nvc0_print_instruction(i); +} + +void +nvc0_print_function(struct nv_basic_block *root) +{ + if (root->subroutine) + debug_printf("SUBROUTINE %i\n", root->subroutine); + else + debug_printf("MAIN\n"); + + nvc0_pc_pass_in_order(root, nv_do_print_function, root); +} + +void +nvc0_print_program(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i]) + nvc0_print_function(pc->root[i]); +} + +#if NOUVEAU_DEBUG > 1 +static void +nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) +{ + int i; + + b->pass_seq = pc->pass_seq; + + fprintf(f, "\t%i [shape=box]\n", b->id); + + for (i = 0; i < 2; ++i) { + if (!b->out[i]) + continue; + switch (b->out_kind[i]) { + case CFG_EDGE_FORWARD: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_ENTER: + fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_LEAVE: + fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_BACK: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + continue; + case CFG_EDGE_FAKE: + fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); + break; + default: + assert(0); + break; + } + if (b->out[i]->pass_seq < pc->pass_seq) + nv_do_print_cfgraph(pc, f, b->out[i]); + } +} + +/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ +static void +nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) +{ + FILE *f; + + f = fopen(filepath, "a"); + if (!f) + return; + + fprintf(f, "digraph G {\n"); + + ++pc->pass_seq; + + nv_do_print_cfgraph(pc, f, pc->root[subr]); + + fprintf(f, "}\n"); + + fclose(f); +} +#endif + +static INLINE void +nvc0_pc_print_binary(struct nv_pc *pc) +{ + unsigned i; + + NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); + + for (i = 0; i < pc->emit_size / 4; i += 2) { + debug_printf("0x%08x ", pc->emit[i + 0]); + debug_printf("0x%08x ", pc->emit[i + 1]); + if ((i % 16) == 15) + debug_printf("\n"); + } + debug_printf("\n"); +} + +static int +nvc0_emit_program(struct nv_pc *pc) +{ + uint32_t *code = pc->emit; + int n; + + NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); + + pc->emit_pos = 0; + for (n = 0; n < pc->num_blocks; ++n) { + struct nv_instruction *i; + struct nv_basic_block *b = pc->bb_list[n]; + + for (i = b->entry; i; i = i->next) { + nvc0_emit_instruction(pc, i); + pc->emit += 2; + pc->emit_pos += 8; + } + } + assert(pc->emit == &code[pc->emit_size / 4]); + + pc->emit[0] = 0x00001de7; + pc->emit[1] = 0x80000000; + pc->emit_size += 8; + + pc->emit = code; + +#ifdef NOUVEAU_DEBUG + nvc0_pc_print_binary(pc); +#else + debug_printf("not printing binary\n"); +#endif + return 0; +} + +int +nvc0_generate_code(struct nvc0_translation_info *ti) +{ + struct nv_pc *pc; + int ret; + int i; + + pc = CALLOC_STRUCT(nv_pc); + if (!pc) + return 1; + + pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT; + + pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0])); + if (!pc->root) { + FREE(pc); + return 1; + } + pc->num_subroutines = ti->num_subrs; + + ret = nvc0_tgsi_to_nc(pc, ti); + if (ret) + goto out; +#if NOUVEAU_DEBUG > 1 + nvc0_print_program(pc); +#endif + + pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE; + + /* optimization */ + ret = nvc0_pc_exec_pass0(pc); + if (ret) + goto out; +#ifdef NOUVEAU_DEBUG + nvc0_print_program(pc); +#endif + + /* register allocation */ + ret = nvc0_pc_exec_pass1(pc); + if (ret) + goto out; +#if NOUVEAU_DEBUG > 1 + nvc0_print_program(pc); + nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); +#endif + + /* prepare for emission */ + ret = nvc0_pc_exec_pass2(pc); + if (ret) + goto out; + assert(!(pc->emit_size % 8)); + + pc->emit = CALLOC(pc->emit_size / 4 + 2, 4); + if (!pc->emit) { + ret = 3; + goto out; + } + ret = nvc0_emit_program(pc); + if (ret) + goto out; + + ti->prog->code = pc->emit; + ti->prog->code_base = 0; + ti->prog->code_size = pc->emit_size; + ti->prog->parm_size = 0; + + ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1); + + ti->prog->relocs = pc->reloc_entries; + ti->prog->num_relocs = pc->num_relocs; + + NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + +out: + nv_pc_free_refs(pc); + + for (i = 0; i < pc->num_blocks; ++i) + FREE(pc->bb_list[i]); + if (pc->root) + FREE(pc->root); + if (ret) { + /* on success, these will be referenced by struct nvc0_program */ + if (pc->emit) + FREE(pc->emit); + if (pc->immd_buf) + FREE(pc->immd_buf); + if (pc->reloc_entries) + FREE(pc->reloc_entries); + } + FREE(pc); + return ret; +} + +static void +nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) +{ + if (!b->phi) { + i->prev = NULL; + b->phi = i; + i->next = b->entry; + if (b->entry) { + assert(!b->entry->prev && b->exit); + b->entry->prev = i; + } else { + b->entry = i; + b->exit = i; + } + } else { + assert(b->entry); + if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ + assert(b->entry == b->exit); + b->entry->next = i; + i->prev = b->entry; + b->entry = i; + b->exit = i; + } else { /* insert before entry */ + assert(b->entry->prev && b->exit); + i->next = b->entry; + i->prev = b->entry->prev; + b->entry->prev = i; + i->prev->next = i; + } + } +} + +void +nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) +{ + if (i->opcode == NV_OP_PHI) { + nvbb_insert_phi(b, i); + } else { + i->prev = b->exit; + if (b->exit) + b->exit->next = i; + b->exit = i; + if (!b->entry) + b->entry = i; + else + if (i->prev && i->prev->opcode == NV_OP_PHI) + b->entry = i; + } + + i->bb = b; + b->num_instructions++; + + if (i->prev && i->prev->terminator) + nvc0_insns_permute(i->prev, i); +} + +void +nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) +{ + if (!at->next) { + nvc0_insn_append(at->bb, ni); + return; + } + ni->next = at->next; + ni->prev = at; + ni->next->prev = ni; + ni->prev->next = ni; + ni->bb = at->bb; + ni->bb->num_instructions++; +} + +void +nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni) +{ + nvc0_insn_insert_after(at, ni); + nvc0_insns_permute(at, ni); +} + +void +nvc0_insn_delete(struct nv_instruction *nvi) +{ + struct nv_basic_block *b = nvi->bb; + int s; + + /* debug_printf("REM: "); nv_print_instruction(nvi); */ + + for (s = 0; s < 6 && nvi->src[s]; ++s) + nv_reference(NULL, nvi, s, NULL); + + if (nvi->next) + nvi->next->prev = nvi->prev; + else { + assert(nvi == b->exit); + b->exit = nvi->prev; + } + + if (nvi->prev) + nvi->prev->next = nvi->next; + + if (nvi == b->entry) { + /* PHIs don't get hooked to b->entry */ + b->entry = nvi->next; + assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); + } + + if (nvi == b->phi) { + if (nvi->opcode != NV_OP_PHI) + NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n"); + + assert(!nvi->prev); + if (!nvi->next || nvi->next->opcode != NV_OP_PHI) + b->phi = NULL; + else + b->phi = nvi->next; + } +} + +void +nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2) +{ + struct nv_basic_block *b = i1->bb; + + assert(i1->opcode != NV_OP_PHI && + i2->opcode != NV_OP_PHI); + assert(i1->next == i2); + + if (b->exit == i2) + b->exit = i1; + + if (b->entry == i1) + b->entry = i2; + + i2->prev = i1->prev; + i1->next = i2->next; + i2->next = i1; + i1->prev = i2; + + if (i2->prev) + i2->prev->next = i2; + if (i1->next) + i1->next->prev = i1; +} + +void +nvc0_bblock_attach(struct nv_basic_block *parent, + struct nv_basic_block *b, ubyte edge_kind) +{ + assert(b->num_in < 8); + + if (parent->out[0]) { + assert(!parent->out[1]); + parent->out[1] = b; + parent->out_kind[1] = edge_kind; + } else { + parent->out[0] = b; + parent->out_kind[0] = edge_kind; + } + + b->in[b->num_in] = parent; + b->in_kind[b->num_in++] = edge_kind; +} + +/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ + +boolean +nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) +{ + int j; + + if (b == d) + return TRUE; + + for (j = 0; j < b->num_in; ++j) + if ((b->in_kind[j] != CFG_EDGE_BACK) && + !nvc0_bblock_dominated_by(b->in[j], d)) + return FALSE; + + return j ? TRUE : FALSE; +} + +/* check if @bf (future) can be reached from @bp (past), stop at @bt */ +boolean +nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, + struct nv_basic_block *bt) +{ + struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; + int i, p, n; + + p = 0; + n = 1; + q[0] = bp; + + while (p < n) { + b = q[p++]; + + if (b == bf) + break; + if (b == bt) + continue; + assert(n <= (1024 - 2)); + + for (i = 0; i < 2; ++i) { + if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { + q[n] = b->out[i]; + q[n++]->priv = 1; + } + } + } + for (--n; n >= 0; --n) + q[n]->priv = 0; + + return (b == bf); +} + +static struct nv_basic_block * +nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) +{ + struct nv_basic_block *out; + int i; + + if (!nvc0_bblock_dominated_by(df, b)) { + for (i = 0; i < df->num_in; ++i) { + if (df->in_kind[i] == CFG_EDGE_BACK) + continue; + if (nvc0_bblock_dominated_by(df->in[i], b)) + return df; + } + } + for (i = 0; i < 2 && df->out[i]; ++i) { + if (df->out_kind[i] == CFG_EDGE_BACK) + continue; + if ((out = nvbb_find_dom_frontier(b, df->out[i]))) + return out; + } + return NULL; +} + +struct nv_basic_block * +nvc0_bblock_dom_frontier(struct nv_basic_block *b) +{ + struct nv_basic_block *df; + int i; + + for (i = 0; i < 2 && b->out[i]; ++i) + if ((df = nvbb_find_dom_frontier(b, b->out[i]))) + return df; + return NULL; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h new file mode 100644 index 0000000000..3a5612a5fa --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -0,0 +1,650 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NVC0_COMPILER_H__ +#define __NVC0_COMPILER_H__ + +#include <stdio.h> + +#ifndef NOUVEAU_DBG +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) debug_printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif +#endif + +#ifndef NOUVEAU_ERR +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); +#endif + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +/* pseudo opcodes */ +#define NV_OP_UNDEF 0 +#define NV_OP_BIND 1 +#define NV_OP_MERGE 2 +#define NV_OP_PHI 3 +#define NV_OP_SELECT 4 +#define NV_OP_NOP 5 + +/** + * BIND forces source operand i into the same register as destination operand i, + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! + * SELECT forces its multiple source operands and its destination operand into + * one and the same register. + */ + +/* base opcodes */ +#define NV_OP_LD 6 +#define NV_OP_ST 7 +#define NV_OP_MOV 8 +#define NV_OP_AND 9 +#define NV_OP_OR 10 +#define NV_OP_XOR 11 +#define NV_OP_SHL 12 +#define NV_OP_SHR 13 +#define NV_OP_NOT 14 +#define NV_OP_SET 15 +#define NV_OP_ADD 16 +#define NV_OP_SUB 17 +#define NV_OP_MUL 18 +#define NV_OP_MAD 19 +#define NV_OP_ABS 20 +#define NV_OP_NEG 21 +#define NV_OP_MAX 22 +#define NV_OP_MIN 23 +#define NV_OP_CVT 24 +#define NV_OP_CEIL 25 +#define NV_OP_FLOOR 26 +#define NV_OP_TRUNC 27 +#define NV_OP_SAD 28 + +/* shader opcodes */ +#define NV_OP_VFETCH 29 +#define NV_OP_PFETCH 30 +#define NV_OP_EXPORT 31 +#define NV_OP_LINTERP 32 +#define NV_OP_PINTERP 33 +#define NV_OP_EMIT 34 +#define NV_OP_RESTART 35 +#define NV_OP_TEX 36 +#define NV_OP_TXB 37 +#define NV_OP_TXL 38 +#define NV_OP_TXF 39 +#define NV_OP_TXQ 40 +#define NV_OP_QUADOP 41 +#define NV_OP_DFDX 42 +#define NV_OP_DFDY 43 +#define NV_OP_KIL 44 + +/* control flow opcodes */ +#define NV_OP_BRA 45 +#define NV_OP_CALL 46 +#define NV_OP_RET 47 +#define NV_OP_EXIT 48 +#define NV_OP_BREAK 49 +#define NV_OP_BREAKADDR 50 +#define NV_OP_JOINAT 51 +#define NV_OP_JOIN 52 + +/* typed opcodes */ +#define NV_OP_ADD_F32 NV_OP_ADD +#define NV_OP_ADD_B32 53 +#define NV_OP_MUL_F32 NV_OP_MUL +#define NV_OP_MUL_B32 54 +#define NV_OP_ABS_F32 NV_OP_ABS +#define NV_OP_ABS_S32 55 +#define NV_OP_NEG_F32 NV_OP_NEG +#define NV_OP_NEG_S32 56 +#define NV_OP_MAX_F32 NV_OP_MAX +#define NV_OP_MAX_S32 57 +#define NV_OP_MAX_U32 58 +#define NV_OP_MIN_F32 NV_OP_MIN +#define NV_OP_MIN_S32 59 +#define NV_OP_MIN_U32 60 +#define NV_OP_SET_F32 61 +#define NV_OP_SET_S32 62 +#define NV_OP_SET_U32 63 +#define NV_OP_SAR 64 +#define NV_OP_RCP 65 +#define NV_OP_RSQ 66 +#define NV_OP_LG2 67 +#define NV_OP_SIN 68 +#define NV_OP_COS 69 +#define NV_OP_EX2 70 +#define NV_OP_PRESIN 71 +#define NV_OP_PREEX2 72 +#define NV_OP_SAT 73 + +/* newly added opcodes */ +#define NV_OP_SET_F32_AND 74 +#define NV_OP_SET_F32_OR 75 +#define NV_OP_SET_F32_XOR 76 +#define NV_OP_SELP 77 +#define NV_OP_SLCT 78 +#define NV_OP_SLCT_F32 NV_OP_SLCT +#define NV_OP_SLCT_S32 79 +#define NV_OP_SLCT_U32 80 +#define NV_OP_SUB_F32 NV_OP_SUB +#define NV_OP_SUB_S32 81 +#define NV_OP_MAD_F32 NV_OP_MAD +#define NV_OP_FSET_F32 82 +#define NV_OP_TXG 83 + +#define NV_OP_COUNT 84 + +/* nv50 files omitted */ +#define NV_FILE_GPR 0 +#define NV_FILE_COND 1 +#define NV_FILE_PRED 2 +#define NV_FILE_IMM 16 +#define NV_FILE_MEM_S 32 +#define NV_FILE_MEM_V 34 +#define NV_FILE_MEM_A 35 +#define NV_FILE_MEM_L 48 +#define NV_FILE_MEM_G 64 +#define NV_FILE_MEM_C(i) (80 + i) + +#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S) + +#define NV_MOD_NEG 1 +#define NV_MOD_ABS 2 +#define NV_MOD_NOT 4 +#define NV_MOD_SAT 8 + +#define NV_TYPE_U8 0x00 +#define NV_TYPE_S8 0x01 +#define NV_TYPE_U16 0x02 +#define NV_TYPE_S16 0x03 +#define NV_TYPE_U32 0x04 +#define NV_TYPE_S32 0x05 +#define NV_TYPE_P32 0x07 +#define NV_TYPE_F32 0x09 +#define NV_TYPE_F64 0x0b +#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) +#define NV_TYPE_ANY 0xff + +#define NV_TYPE_ISINT(t) ((t) < 7) +#define NV_TYPE_ISSGD(t) ((t) & 1) + +#define NV_CC_FL 0x0 +#define NV_CC_LT 0x1 +#define NV_CC_EQ 0x2 +#define NV_CC_LE 0x3 +#define NV_CC_GT 0x4 +#define NV_CC_NE 0x5 +#define NV_CC_GE 0x6 +#define NV_CC_U 0x8 +#define NV_CC_TR 0xf +#define NV_CC_O 0x10 +#define NV_CC_C 0x11 +#define NV_CC_A 0x12 +#define NV_CC_S 0x13 +#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) +/* for 1 bit predicates: */ +#define NV_CC_P 0 +#define NV_CC_NOT_P 1 + +uint8_t nvc0_ir_reverse_cc(uint8_t cc); + +#define NV_PC_MAX_INSTRUCTIONS 2048 +#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) + +#define NV_PC_MAX_BASIC_BLOCKS 1024 + +struct nv_op_info { + uint base; /* e.g. ADD_S32 -> ADD */ + char name[12]; + uint8_t type; + uint16_t mods; + unsigned flow : 1; + unsigned commutative : 1; + unsigned vector : 1; + unsigned predicate : 1; + unsigned pseudo : 1; + unsigned immediate : 3; + unsigned memory : 3; +}; + +extern struct nv_op_info nvc0_op_info_table[]; + +#define NV_BASEOP(op) (nvc0_op_info_table[op].base) +#define NV_OPTYPE(op) (nvc0_op_info_table[op].type) + +static INLINE boolean +nv_is_texture_op(uint opcode) +{ + return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ); +} + +static INLINE boolean +nv_is_vector_op(uint opcode) +{ + return nvc0_op_info_table[opcode].vector ? TRUE : FALSE; +} + +static INLINE boolean +nv_op_commutative(uint opcode) +{ + return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE; +} + +static INLINE uint8_t +nv_op_supported_src_mods(uint opcode, int s) +{ + return (nvc0_op_info_table[opcode].mods >> (s * 4)) & 0xf; +} + +static INLINE uint +nv_type_order(ubyte type) +{ + switch (type & 0xf) { + case NV_TYPE_U8: + case NV_TYPE_S8: + return 0; + case NV_TYPE_U16: + case NV_TYPE_S16: + return 1; + case NV_TYPE_U32: + case NV_TYPE_F32: + case NV_TYPE_S32: + case NV_TYPE_P32: + return 2; + case NV_TYPE_F64: + return 3; + } + assert(0); + return 0; +} + +static INLINE uint +nv_type_sizeof(ubyte type) +{ + if (type & 0xf0) + return (1 << nv_type_order(type)) * (type >> 4); + return 1 << nv_type_order(type); +} + +static INLINE uint +nv_type_sizeof_base(ubyte type) +{ + return 1 << nv_type_order(type); +} + +struct nv_reg { + uint32_t address; /* for memory locations */ + int id; /* for registers */ + ubyte file; + ubyte size; + union { + int32_t s32; + int64_t s64; + uint64_t u64; + uint32_t u32; /* expected to be 0 for $r63 */ + float f32; + double f64; + } imm; +}; + +struct nv_range { + struct nv_range *next; + int bgn; + int end; +}; + +struct nv_ref; + +struct nv_value { + struct nv_reg reg; + struct nv_instruction *insn; + struct nv_value *join; + struct nv_ref *last_use; + int n; + struct nv_range *livei; + int refc; + struct nv_value *next; + struct nv_value *prev; +}; + +struct nv_ref { + struct nv_value *value; + struct nv_instruction *insn; + struct list_head list; /* connects uses of the same value */ + uint8_t mod; + uint8_t flags; +}; + +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + +struct nv_basic_block; + +struct nv_instruction { + struct nv_instruction *next; + struct nv_instruction *prev; + uint opcode; + uint serial; + + struct nv_value *def[5]; + struct nv_ref *src[6]; + + int8_t predicate; /* index of predicate src */ + int8_t indirect; /* index of pointer src */ + + union { + struct { + uint8_t t; /* TIC binding */ + uint8_t s; /* TSC binding */ + } tex; + struct { + uint8_t d; /* output type */ + uint8_t s; /* input type */ + } cvt; + } ext; + + struct nv_basic_block *bb; + struct nv_basic_block *target; /* target block of control flow insn */ + + unsigned cc : 5; /* condition code */ + unsigned fixed : 1; /* don't optimize away (prematurely) */ + unsigned terminator : 1; + unsigned join : 1; + unsigned set_cond : 4; /* 2nd byte */ + unsigned saturate : 1; + unsigned centroid : 1; + unsigned flat : 1; + unsigned patch : 1; + unsigned lanes : 4; /* 3rd byte */ + unsigned tex_dim : 2; + unsigned tex_array : 1; + unsigned tex_cube : 1; + unsigned tex_shadow : 1; /* 4th byte */ + unsigned tex_live : 1; + unsigned tex_mask : 4; + + uint8_t quadop; +}; + +static INLINE int +nvi_vector_size(struct nv_instruction *nvi) +{ + int i; + assert(nvi); + for (i = 0; i < 5 && nvi->def[i]; ++i); + return i; +} + +#define CFG_EDGE_FORWARD 0 +#define CFG_EDGE_BACK 1 +#define CFG_EDGE_LOOP_ENTER 2 +#define CFG_EDGE_LOOP_LEAVE 4 +#define CFG_EDGE_FAKE 8 + +/* 'WALL' edge means where reachability check doesn't follow */ +/* 'LOOP' edge means just having to do with loops */ +#define IS_LOOP_EDGE(k) ((k) & 7) +#define IS_WALL_EDGE(k) ((k) & 9) + +struct nv_basic_block { + struct nv_instruction *entry; /* first non-phi instruction */ + struct nv_instruction *exit; + struct nv_instruction *phi; /* very first instruction */ + int num_instructions; + + struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ + struct nv_basic_block *in[8]; /* hope that suffices */ + uint num_in; + ubyte out_kind[2]; + ubyte in_kind[8]; + + int id; + int subroutine; + uint priv; /* reset to 0 after you're done */ + uint pass_seq; + + uint32_t emit_pos; /* position, size in emitted code (in bytes) */ + uint32_t emit_size; + + uint32_t live_set[NV_PC_MAX_VALUES / 32]; +}; + +struct nvc0_translation_info; + +struct nv_pc { + struct nv_basic_block **root; + struct nv_basic_block *current_block; + struct nv_basic_block *parent_block; + + int loop_nesting_bound; + uint pass_seq; + + struct nv_value values[NV_PC_MAX_VALUES]; + struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; + struct nv_ref **refs; + struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; + int num_values; + int num_instructions; + int num_refs; + int num_blocks; + int num_subroutines; + + int max_reg[4]; + + uint32_t *immd_buf; /* populated on emit */ + unsigned immd_count; + + uint32_t *emit; + uint32_t emit_size; + uint32_t emit_pos; + + void *reloc_entries; + unsigned num_relocs; + + /* optimization enables */ + boolean opt_reload_elim; + boolean is_fragprog; +}; + +void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *); +void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *); +void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *); + +static INLINE struct nv_instruction * +nv_alloc_instruction(struct nv_pc *pc, uint opcode) +{ + struct nv_instruction *insn; + + insn = &pc->instructions[pc->num_instructions++]; + assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); + + insn->opcode = opcode; + insn->cc = NV_CC_P; + insn->indirect = -1; + insn->predicate = -1; + + return insn; +} + +static INLINE struct nv_instruction * +new_instruction(struct nv_pc *pc, uint opcode) +{ + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + + nvc0_insn_append(pc->current_block, insn); + return insn; +} + +static INLINE struct nv_instruction * +new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) +{ + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + + nvc0_insn_insert_after(at, insn); + return insn; +} + +static INLINE struct nv_value * +new_value(struct nv_pc *pc, ubyte file, ubyte size) +{ + struct nv_value *value = &pc->values[pc->num_values]; + + assert(pc->num_values < NV_PC_MAX_VALUES - 1); + + value->n = pc->num_values++; + value->join = value; + value->reg.id = -1; + value->reg.file = file; + value->reg.size = size; + return value; +} + +static INLINE struct nv_value * +new_value_like(struct nv_pc *pc, struct nv_value *like) +{ + return new_value(pc, like->reg.file, like->reg.size); +} + +static INLINE struct nv_ref * +new_ref(struct nv_pc *pc, struct nv_value *val) +{ + int i; + struct nv_ref *ref; + + if ((pc->num_refs % 64) == 0) { + const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); + const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); + + pc->refs = REALLOC(pc->refs, old_size, new_size); + + ref = CALLOC(64, sizeof(struct nv_ref)); + for (i = 0; i < 64; ++i) + pc->refs[pc->num_refs + i] = &ref[i]; + } + + ref = pc->refs[pc->num_refs++]; + ref->value = val; + + LIST_INITHEAD(&ref->list); + + ++val->refc; + return ref; +} + +static INLINE struct nv_basic_block * +new_basic_block(struct nv_pc *pc) +{ + struct nv_basic_block *bb; + + if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) + return NULL; + + bb = CALLOC_STRUCT(nv_basic_block); + + bb->id = pc->num_blocks; + pc->bb_list[pc->num_blocks++] = bb; + return bb; +} + +static INLINE void +nv_reference(struct nv_pc *pc, + struct nv_instruction *nvi, int c, struct nv_value *s) +{ + struct nv_ref **d = &nvi->src[c]; + assert(c < 6); + + if (*d) { + --(*d)->value->refc; + LIST_DEL(&(*d)->list); + } + + if (s) { + if (!*d) { + *d = new_ref(pc, s); + (*d)->insn = nvi; + } else { + LIST_DEL(&(*d)->list); + (*d)->value = s; + ++(s->refc); + } + if (!s->last_use) + s->last_use = *d; + else + LIST_ADDTAIL(&s->last_use->list, &(*d)->list); + + s->last_use = *d; + (*d)->insn = nvi; + } else { + *d = NULL; + } +} + +/* nvc0_emit.c */ +void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *); + +/* nvc0_print.c */ +const char *nvc0_opcode_name(uint opcode); +void nvc0_print_instruction(struct nv_instruction *); + +/* nvc0_pc.c */ +void nvc0_print_function(struct nv_basic_block *root); +void nvc0_print_program(struct nv_pc *); + +boolean nvc0_insn_can_load(struct nv_instruction *, int s, + struct nv_instruction *); +boolean nvc0_insn_is_predicateable(struct nv_instruction *); + +int nvc0_insn_refcount(struct nv_instruction *); +void nvc0_insn_delete(struct nv_instruction *); +void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *); + +void nvc0_bblock_attach(struct nv_basic_block *parent, + struct nv_basic_block *child, ubyte edge_kind); +boolean nvc0_bblock_dominated_by(struct nv_basic_block *, + struct nv_basic_block *); +boolean nvc0_bblock_reachable_by(struct nv_basic_block *future, + struct nv_basic_block *past, + struct nv_basic_block *final); +struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *); + +int nvc0_pc_replace_value(struct nv_pc *pc, + struct nv_value *old_val, + struct nv_value *new_val); + +struct nv_value *nvc0_pc_find_immediate(struct nv_ref *); +struct nv_value *nvc0_pc_find_constant(struct nv_ref *); + +typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); + +void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); + +int nvc0_pc_exec_pass0(struct nv_pc *pc); +int nvc0_pc_exec_pass1(struct nv_pc *pc); +int nvc0_pc_exec_pass2(struct nv_pc *pc); + +int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *); + +#endif // NV50_COMPILER_H diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c new file mode 100644 index 0000000000..76ad40dbcf --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -0,0 +1,1020 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +#define NVC0_FIXUP_CODE_RELOC 0 +#define NVC0_FIXUP_DATA_RELOC 1 + +struct nvc0_fixup { + uint8_t type; + int8_t shift; + uint32_t mask; + uint32_t data; + uint32_t ofst; +}; + +void +nvc0_relocate_program(struct nvc0_program *prog, + uint32_t code_base, + uint32_t data_base) +{ + struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs; + unsigned i; + + for (i = 0; i < prog->num_relocs; ++i) { + uint32_t data; + + switch (f[i].type) { + case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break; + case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break; + default: + data = f[i].data; + break; + } + data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift); + + prog->code[f[i].ofst / 4] &= ~f[i].mask; + prog->code[f[i].ofst / 4] |= data & f[i].mask; + } +} + +static void +create_fixup(struct nv_pc *pc, uint8_t ty, + int w, uint32_t data, uint32_t m, int s) +{ + struct nvc0_fixup *f; + + const unsigned size = sizeof(struct nvc0_fixup); + const unsigned n = pc->num_relocs; + + if (!(n % 8)) + pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size); + + f = (struct nvc0_fixup *)pc->reloc_entries; + + f[n].ofst = pc->emit_pos + w * 4; + f[n].type = ty; + f[n].data = data; + f[n].mask = m; + f[n].shift = s; + + ++pc->num_relocs; +} + +static INLINE ubyte +SSIZE(struct nv_instruction *nvi, int s) +{ + return nvi->src[s]->value->reg.size; +} + +static INLINE ubyte +DSIZE(struct nv_instruction *nvi, int d) +{ + return nvi->def[d]->reg.size; +} + +static INLINE struct nv_reg * +SREG(struct nv_ref *ref) +{ + if (!ref) + return NULL; + return &ref->value->join->reg; +} + +static INLINE struct nv_reg * +DREG(struct nv_value *val) +{ + if (!val) + return NULL; + return &val->join->reg; +} + +static INLINE ubyte +SFILE(struct nv_instruction *nvi, int s) +{ + return nvi->src[s]->value->reg.file; +} + +static INLINE ubyte +DFILE(struct nv_instruction *nvi, int d) +{ + return nvi->def[0]->reg.file; +} + +static INLINE void +SID(struct nv_pc *pc, struct nv_ref *ref, int pos) +{ + pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32); +} + +static INLINE void +DID(struct nv_pc *pc, struct nv_value *val, int pos) +{ + pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32); +} + +static INLINE uint32_t +get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */ +{ + assert(ref->value->reg.file == NV_FILE_IMM); + return ref->value->reg.imm.u32; +} + +static INLINE void +set_immd_u32_l(struct nv_pc *pc, uint32_t u32) +{ + pc->emit[0] |= (u32 & 0x3f) << 26; + pc->emit[1] |= u32 >> 6; +} + +static INLINE void +set_immd_u32(struct nv_pc *pc, uint32_t u32) +{ + if ((pc->emit[0] & 0xf) == 0x2) { + set_immd_u32_l(pc, u32); + } else + if ((pc->emit[0] & 0xf) == 0x3) { + assert(!(pc->emit[1] & 0xc000)); + pc->emit[1] |= 0xc000; + assert(!(u32 & 0xfff00000)); + set_immd_u32_l(pc, u32); + } else { + assert(!(pc->emit[1] & 0xc000)); + pc->emit[1] |= 0xc000; + assert(!(u32 & 0xfff)); + set_immd_u32_l(pc, u32 >> 12); + } +} + +static INLINE void +set_immd(struct nv_pc *pc, struct nv_instruction *i, int s) +{ + set_immd_u32(pc, get_immd_u32(i->src[s])); +} + +static INLINE void +DVS(struct nv_pc *pc, struct nv_instruction *i) +{ + uint s = i->def[0]->reg.size; + int n; + for (n = 1; n < 4 && i->def[n]; ++n) + s += i->def[n]->reg.size; + pc->emit[0] |= ((s / 4) - 1) << 5; +} + +static INLINE void +SVS(struct nv_pc *pc, struct nv_ref *src) +{ + pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5; +} + +static void +set_pred(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->predicate >= 0) { + SID(pc, i->src[i->predicate], 6); + if (i->cc) + pc->emit[0] |= 0x2000; /* negate */ + } else { + pc->emit[0] |= 0x1c00; + } +} + +static INLINE void +set_address_16(struct nv_pc *pc, struct nv_ref *src) +{ + pc->emit[0] |= (src->value->reg.address & 0x003f) << 26; + pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6; +} + +static INLINE unsigned +const_space_index(struct nv_instruction *i, int s) +{ + return SFILE(i, s) - NV_FILE_MEM_C(0); +} + +static void +emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) +{ + pc->emit[0] = 0x00000007; + pc->emit[1] = op << 24; + + if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { + /* bra, exit, ret or kil */ + pc->emit[0] |= 0x1e0; + set_pred(pc, i); + } + + if (i->target) { + int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); + + /* we will need relocations only for global functions */ + /* + create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); + create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); + */ + + pc->emit[0] |= (pcrel & 0x3f) << 26; + pc->emit[1] |= (pcrel >> 6) & 0x3ffff; + } +} + +/* doesn't work for vfetch, export, ld, st, mov ... */ +static void +emit_form_0(struct nv_pc *pc, struct nv_instruction *i) +{ + int s; + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + + for (s = 0; s < 3 && i->src[s]; ++s) { + if (SFILE(i, s) >= NV_FILE_MEM_C(0) && + SFILE(i, s) <= NV_FILE_MEM_C(15)) { + assert(!(pc->emit[1] & 0xc000)); + assert(s <= 1); + pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); + set_address_16(pc, i->src[s]); + } else + if (SFILE(i, s) == NV_FILE_GPR) { + SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20); + } else + if (SFILE(i, s) == NV_FILE_IMM) { + assert(!(pc->emit[1] & 0xc000)); + assert(s == 1 || i->opcode == NV_OP_MOV); + set_immd(pc, i, s); + } + } +} + +static void +emit_form_1(struct nv_pc *pc, struct nv_instruction *i) +{ + int s; + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + + for (s = 0; s < 1 && i->src[s]; ++s) { + if (SFILE(i, s) >= NV_FILE_MEM_C(0) && + SFILE(i, s) <= NV_FILE_MEM_C(15)) { + assert(!(pc->emit[1] & 0xc000)); + assert(s <= 1); + pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); + set_address_16(pc, i->src[s]); + } else + if (SFILE(i, s) == NV_FILE_GPR) { + SID(pc, i->src[s], 26); + } else + if (SFILE(i, s) == NV_FILE_IMM) { + assert(!(pc->emit[1] & 0xc000)); + assert(s == 1 || i->opcode == NV_OP_MOV); + set_immd(pc, i, s); + } + } +} + +static void +emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->src[0]->mod & NV_MOD_ABS) + pc->emit[0] |= 1 << 7; + if (i->src[0]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 9; + if (i->src[1]->mod & NV_MOD_ABS) + pc->emit[0] |= 1 << 6; + if (i->src[1]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 8; +} + +static void +emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x50000000; + + emit_form_0(pc, i); + + emit_neg_abs_1_2(pc, i); + + if (i->saturate) + pc->emit[1] |= 1 << 17; +} + +static void +emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x58000000; + + emit_form_0(pc, i); + + if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) + pc->emit[1] |= 1 << 25; + + if (i->saturate) + pc->emit[0] |= 1 << 5; +} + +static void +emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x30000000; + + emit_form_0(pc, i); + + if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) + pc->emit[0] |= 1 << 9; + + if (i->src[2]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 8; + + if (i->saturate) + pc->emit[0] |= 1 << 5; +} + +static void +emit_minmax(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x08000000; + + if (NV_BASEOP(i->opcode) == NV_OP_MAX) + pc->emit[1] |= 0x001e0000; + else + pc->emit[1] |= 0x000e0000; /* predicate ? */ + + emit_form_0(pc, i); + + emit_neg_abs_1_2(pc, i); + + switch (i->opcode) { + case NV_OP_MIN_U32: + case NV_OP_MAX_U32: + pc->emit[0] |= 3; + break; + case NV_OP_MIN_S32: + case NV_OP_MAX_S32: + pc->emit[0] |= 3 | (1 << 5); + break; + case NV_OP_MIN_F32: + case NV_OP_MAX_F32: + default: + break; + } +} + +static void +emit_tex(struct nv_pc *pc, struct nv_instruction *i) +{ + int src1 = i->tex_array + i->tex_dim + i->tex_cube; + + assert(src1 < 6); + + pc->emit[0] = 0x00000086; + pc->emit[1] = 0x80000000; + + switch (i->opcode) { + case NV_OP_TEX: pc->emit[1] = 0x80000000; break; + case NV_OP_TXB: pc->emit[1] = 0x84000000; break; + case NV_OP_TXL: pc->emit[1] = 0x86000000; break; + case NV_OP_TXF: pc->emit[1] = 0x90000000; break; + case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; + default: + assert(0); + break; + } + + if (i->tex_array) + pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ + if (i->tex_shadow) + pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + SID(pc, i->src[0], 20); + SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ + + pc->emit[1] |= i->tex_mask << 14; + pc->emit[1] |= (i->tex_dim - 1) << 20; + if (i->tex_cube) + pc->emit[1] |= 3 << 20; + + assert(i->ext.tex.s < 16); + + pc->emit[1] |= i->ext.tex.t; + pc->emit[1] |= i->ext.tex.s << 8; + + if (i->tex_live) + pc->emit[0] |= 1 << 9; +} + +/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */ +static void +emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0xc8000000; + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + SID(pc, i->src[0], 20); + + pc->emit[0] |= op << 26; + + if (op >= 3) { + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; + } else { + assert(!i->src[0]->mod); + } +} + +static void +emit_quadop(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x48000000; + + set_pred(pc, i); + + assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR); + + DID(pc, i->def[0], 14); + SID(pc, i->src[0], 20); + SID(pc, i->src[0], 26); + + pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */ + pc->emit[1] |= i->quadop; +} + +static void +emit_ddx(struct nv_pc *pc, struct nv_instruction *i) +{ + i->quadop = 0x99; + i->lanes = 4; + i->src[1] = i->src[0]; + emit_quadop(pc, i); +} + +static void +emit_ddy(struct nv_pc *pc, struct nv_instruction *i) +{ + i->quadop = 0xa5; + i->lanes = 5; + i->src[1] = i->src[0]; + emit_quadop(pc, i); +} + +/* preparation op (preex2, presin / convert to fixed point) */ +static void +emit_preop(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x60000000; + + if (i->opcode == NV_OP_PREEX2) + pc->emit[0] |= 0x20; + + emit_form_1(pc, i); + + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8; + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6; +} + +static void +emit_shift(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000003; + + switch (i->opcode) { + case NV_OP_SAR: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SHR: + pc->emit[1] = 0x58000000; + break; + case NV_OP_SHL: + default: + pc->emit[1] = 0x60000000; + break; + } + + emit_form_0(pc, i); +} + +static void +emit_bitop(struct nv_pc *pc, struct nv_instruction *i) +{ + if (SFILE(i, 1) == NV_FILE_IMM) { + pc->emit[0] = 0x00000002; + pc->emit[1] = 0x38000000; + } else { + pc->emit[0] = 0x00000003; + pc->emit[1] = 0x68000000; + } + + switch (i->opcode) { + case NV_OP_OR: + pc->emit[0] |= 0x40; + break; + case NV_OP_XOR: + pc->emit[0] |= 0x80; + break; + case NV_OP_AND: + default: + break; + } + + emit_form_0(pc, i); +} + +static void +emit_set(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + + switch (i->opcode) { + case NV_OP_SET_S32: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SET_U32: + pc->emit[0] |= 0x3; + pc->emit[1] = 0x100e0000; + break; + case NV_OP_SET_F32_AND: + pc->emit[1] = 0x18000000; + break; + case NV_OP_SET_F32_OR: + pc->emit[1] = 0x18200000; + break; + case NV_OP_SET_F32_XOR: + pc->emit[1] = 0x18400000; + break; + case NV_OP_FSET_F32: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SET_F32: + default: + pc->emit[1] = 0x180e0000; + break; + } + + if (DFILE(i, 0) == NV_FILE_PRED) { + pc->emit[0] |= 0x1c000; + pc->emit[1] += 0x08000000; + } + + pc->emit[1] |= i->set_cond << 23; + + emit_form_0(pc, i); + + emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */ +} + +static void +emit_selp(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000004; + pc->emit[1] = 0x20000000; + + emit_form_0(pc, i); + + if (i->cc || (i->src[2]->mod & NV_MOD_NOT)) + pc->emit[1] |= 1 << 20; +} + +static void +emit_slct(struct nv_pc *pc, struct nv_instruction *i) +{ + uint8_t cc = i->set_cond; + + pc->emit[0] = 0x00000000; + + switch (i->opcode) { + case NV_OP_SLCT_S32: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SLCT_U32: + pc->emit[0] |= 0x3; + pc->emit[1] = 0x30000000; + break; + case NV_OP_SLCT_F32: + default: + pc->emit[1] = 0x38000000; + break; + } + + emit_form_0(pc, i); + + if (i->src[2]->mod & NV_MOD_NEG) + cc = nvc0_ir_reverse_cc(cc); + + pc->emit[1] |= cc << 23; +} + +static void +emit_cvt(struct nv_pc *pc, struct nv_instruction *i) +{ + uint32_t rint; + + pc->emit[0] = 0x00000004; + pc->emit[1] = 0x10000000; + + /* if no type conversion specified, get type from opcode */ + if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s) + i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); + + switch (i->ext.cvt.d) { + case NV_TYPE_F32: + switch (i->ext.cvt.s) { + case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ + case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; + } + break; + case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */ + case NV_TYPE_U32: + switch (i->ext.cvt.s) { + case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ + case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; + } + break; + default: + assert(!"cvt: unknown type"); + break; + } + + rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0; + + if (i->opcode == NV_OP_FLOOR) { + pc->emit[0] |= rint; + pc->emit[1] |= 2 << 16; + } else + if (i->opcode == NV_OP_CEIL) { + pc->emit[0] |= rint; + pc->emit[1] |= 4 << 16; + } else + if (i->opcode == NV_OP_TRUNC) { + pc->emit[0] |= rint; + pc->emit[1] |= 6 << 16; + } + + if (i->saturate || i->opcode == NV_OP_SAT) + pc->emit[0] |= 0x20; + + if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS) + pc->emit[0] |= 1 << 6; + if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 8; + + pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20; + pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23; + + emit_form_1(pc, i); +} + +static void +emit_interp(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0xc07e0000; + + DID(pc, i->def[0], 14); + + set_pred(pc, i); + + if (i->indirect) + SID(pc, i->src[i->indirect], 20); + else + SID(pc, NULL, 20); + + if (i->opcode == NV_OP_PINTERP) { + pc->emit[0] |= 0x040; + SID(pc, i->src[1], 26); + } else { + SID(pc, NULL, 26); + } + + pc->emit[1] |= i->src[0]->value->reg.address & 0xffff; + + if (i->centroid) + pc->emit[0] |= 0x100; + else + if (i->flat) + pc->emit[0] |= 0x080; +} + +static void +emit_vfetch(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x03f00006; + pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address; + if (i->patch) + pc->emit[0] |= 0x100; + + set_pred(pc, i); + + DVS(pc, i); + DID(pc, i->def[0], 14); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26); +} + +static void +emit_export(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x0a000000; + if (i->patch) + pc->emit[0] |= 0x100; + + set_pred(pc, i); + + assert(SFILE(i, 0) == NV_FILE_MEM_V); + assert(SFILE(i, 1) == NV_FILE_GPR); + + SID(pc, i->src[1], 26); /* register source */ + SVS(pc, i->src[0]); + + pc->emit[1] |= i->src[0]->value->reg.address & 0xfff; + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); +} + +static void +emit_mov(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->opcode == NV_OP_MOV) + i->lanes = 0xf; + + if (SFILE(i, 0) == NV_FILE_IMM) { + pc->emit[0] = 0x000001e2; + pc->emit[1] = 0x18000000; + } else + if (SFILE(i, 0) == NV_FILE_PRED) { + pc->emit[0] = 0x1c000004; + pc->emit[1] = 0x080e0000; + } else { + pc->emit[0] = 0x00000004 | (i->lanes << 5); + pc->emit[1] = 0x28000000; + } + + emit_form_1(pc, i); +} + +static void +emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(NV_IS_MEMORY_FILE(SFILE(i, 0))); + + switch (SSIZE(i, 0)) { + case 1: + if (NV_TYPE_ISSGD(i->ext.cvt.s)) + pc->emit[0] |= 0x20; + break; + case 2: + pc->emit[0] |= 0x40; + if (NV_TYPE_ISSGD(i->ext.cvt.s)) + pc->emit[0] |= 0x20; + break; + case 4: pc->emit[0] |= 0x80; break; + case 8: pc->emit[0] |= 0xa0; break; + case 16: pc->emit[0] |= 0xc0; break; + default: + NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0)); + break; + } +} + +static void +emit_ld_common(struct nv_pc *pc, struct nv_instruction *i) +{ + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->def[0], 14); +} + +static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + + emit_ld_common(pc, i); +} + +static void +emit_ld(struct nv_pc *pc, struct nv_instruction *i) +{ + if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && + SFILE(i, 0) <= NV_FILE_MEM_C(15)) { + if (SSIZE(i, 0) == 4 && i->indirect < 0) { + i->lanes = 0xf; + emit_mov(pc, i); + } else { + emit_ld_const(pc, i); + } + } else + if (SFILE(i, 0) == NV_FILE_MEM_L) { + pc->emit[0] = 0x00000005; + pc->emit[1] = 0xc0000000; + + emit_ld_common(pc, i); + } else { + NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); + abort(); + } +} + +static void +emit_st(struct nv_pc *pc, struct nv_instruction *i) +{ + if (SFILE(i, 0) != NV_FILE_MEM_L) + NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0)); + + pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */ + pc->emit[1] = 0xc8000000; + + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->src[1]->value, 14); +} + +void +nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) +{ + debug_printf("EMIT: "); nvc0_print_instruction(i); + + switch (i->opcode) { + case NV_OP_VFETCH: + emit_vfetch(pc, i); + break; + case NV_OP_EXPORT: + if (!pc->is_fragprog) + emit_export(pc, i); + break; + case NV_OP_MOV: + emit_mov(pc, i); + break; + case NV_OP_LD: + emit_ld(pc, i); + break; + case NV_OP_ST: + emit_st(pc, i); + break; + case NV_OP_LINTERP: + case NV_OP_PINTERP: + emit_interp(pc, i); + break; + case NV_OP_ADD_F32: + emit_add_f32(pc, i); + break; + case NV_OP_AND: + case NV_OP_OR: + case NV_OP_XOR: + emit_bitop(pc, i); + break; + case NV_OP_CVT: + case NV_OP_ABS_F32: + case NV_OP_ABS_S32: + case NV_OP_NEG_F32: + case NV_OP_NEG_S32: + case NV_OP_SAT: + case NV_OP_CEIL: + case NV_OP_FLOOR: + case NV_OP_TRUNC: + emit_cvt(pc, i); + break; + case NV_OP_DFDX: + emit_ddx(pc, i); + break; + case NV_OP_DFDY: + emit_ddy(pc, i); + break; + case NV_OP_COS: + emit_flop(pc, i, 0); + break; + case NV_OP_SIN: + emit_flop(pc, i, 1); + break; + case NV_OP_EX2: + emit_flop(pc, i, 2); + break; + case NV_OP_LG2: + emit_flop(pc, i, 3); + break; + case NV_OP_RCP: + emit_flop(pc, i, 4); + break; + case NV_OP_RSQ: + emit_flop(pc, i, 5); + break; + case NV_OP_PRESIN: + case NV_OP_PREEX2: + emit_preop(pc, i); + break; + case NV_OP_MAD_F32: + emit_mad_f32(pc, i); + break; + case NV_OP_MAX_F32: + case NV_OP_MAX_S32: + case NV_OP_MAX_U32: + case NV_OP_MIN_F32: + case NV_OP_MIN_S32: + case NV_OP_MIN_U32: + emit_minmax(pc, i); + break; + case NV_OP_MUL_F32: + emit_mul_f32(pc, i); + break; + case NV_OP_SET_F32: + case NV_OP_SET_F32_AND: + case NV_OP_SET_F32_OR: + case NV_OP_SET_F32_XOR: + case NV_OP_SET_S32: + case NV_OP_SET_U32: + case NV_OP_FSET_F32: + emit_set(pc, i); + break; + case NV_OP_SHL: + case NV_OP_SHR: + case NV_OP_SAR: + emit_shift(pc, i); + break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + emit_tex(pc, i); + break; + case NV_OP_BRA: + emit_flow(pc, i, 0x40); + break; + case NV_OP_CALL: + emit_flow(pc, i, 0x50); + break; + case NV_OP_JOINAT: + emit_flow(pc, i, 0x60); + break; + case NV_OP_EXIT: + emit_flow(pc, i, 0x80); + break; + case NV_OP_RET: + emit_flow(pc, i, 0x90); + break; + case NV_OP_KIL: + emit_flow(pc, i, 0x98); + break; + case NV_OP_JOIN: + case NV_OP_NOP: + pc->emit[0] = 0x00003de4; + pc->emit[1] = 0x40000000; + break; + case NV_OP_SELP: + emit_selp(pc, i); + break; + case NV_OP_SLCT_F32: + case NV_OP_SLCT_S32: + case NV_OP_SLCT_U32: + emit_slct(pc, i); + break; + default: + NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); + abort(); + break; + } + + if (i->join) + pc->emit[0] |= 0x10; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c new file mode 100644 index 0000000000..f7bf1680d0 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -0,0 +1,1420 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +#define DESCEND_ARBITRARY(j, f) \ +do { \ + b->pass_seq = ctx->pc->pass_seq; \ + \ + for (j = 0; j < 2; ++j) \ + if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \ + f(ctx, b->out[j]); \ +} while (0) + +static INLINE boolean +registers_interfere(struct nv_value *a, struct nv_value *b) +{ + if (a->reg.file != b->reg.file) + return FALSE; + if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file)) + return FALSE; + + assert(a->join->reg.id >= 0 && b->join->reg.id >= 0); + + if (a->join->reg.id < b->join->reg.id) { + return (a->join->reg.id + a->reg.size >= b->join->reg.id); + } else + if (a->join->reg.id > b->join->reg.id) { + return (b->join->reg.id + b->reg.size >= a->join->reg.id); + } + + return FALSE; +} + +static INLINE boolean +values_equal(struct nv_value *a, struct nv_value *b) +{ + if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) + return FALSE; + if (NV_IS_MEMORY_FILE(a->reg.file)) + return a->reg.address == b->reg.address; + else + return a->join->reg.id == b->join->reg.id; +} + +#if 0 +static INLINE boolean +inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b) +{ + int si, di; + + for (di = 0; di < 4 && a->def[di]; ++di) + for (si = 0; si < 5 && b->src[si]; ++si) + if (registers_interfere(a->def[di], b->src[si]->value)) + return FALSE; + + return TRUE; +} + +/* Check whether we can swap the order of the instructions, + * where a & b may be either the earlier or the later one. + */ +static boolean +inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b) +{ + return inst_commutation_check(a, b) && inst_commutation_check(b, a); +} +#endif + +static INLINE boolean +inst_removable(struct nv_instruction *nvi) +{ + if (nvi->opcode == NV_OP_ST) + return FALSE; + return (!(nvi->terminator || + nvi->join || + nvi->target || + nvi->fixed || + nvc0_insn_refcount(nvi))); +} + +/* Check if we do not actually have to emit this instruction. */ +static INLINE boolean +inst_is_noop(struct nv_instruction *nvi) +{ + if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND) + return TRUE; + if (nvi->terminator || nvi->join) + return FALSE; + if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) + return TRUE; + if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) + return FALSE; + if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file) + return FALSE; + + if (nvi->src[0]->value->join->reg.id < 0) { + NOUVEAU_DBG("inst_is_noop: orphaned value detected\n"); + return TRUE; + } + + if (nvi->opcode == NV_OP_SELECT) + if (!values_equal(nvi->def[0], nvi->src[1]->value)) + return FALSE; + return values_equal(nvi->def[0], nvi->src[0]->value); +} + +struct nv_pass { + struct nv_pc *pc; + int n; + void *priv; +}; + +static int +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b); + +static void +nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) +{ + struct nv_pc *pc = (struct nv_pc *)priv; + struct nv_basic_block *in; + struct nv_instruction *nvi, *next; + int j; + + /* find first non-empty block emitted before b */ + for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); + + for (; j >= 0; --j) { + in = pc->bb_list[j]; + + /* check for no-op branches (BRA $PC+8) */ + if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) { + in->emit_size -= 8; + pc->emit_size -= 8; + + for (++j; j < pc->num_blocks; ++j) + pc->bb_list[j]->emit_pos -= 8; + + nvc0_insn_delete(in->exit); + } + b->emit_pos = in->emit_pos + in->emit_size; + + if (in->emit_size) /* no more no-op branches to b */ + break; + } + + pc->bb_list[pc->num_blocks++] = b; + + /* visit node */ + + for (nvi = b->entry; nvi; nvi = next) { + next = nvi->next; + if (inst_is_noop(nvi) || + (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) { + nvc0_insn_delete(nvi); + } else + b->emit_size += 8; + } + pc->emit_size += b->emit_size; + +#ifdef NOUVEAU_DEBUG + if (!b->entry) + debug_printf("BB:%i is now empty\n", b->id); + else + debug_printf("BB:%i size = %u\n", b->id, b->emit_size); +#endif +} + +static int +nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) +{ + struct nv_pass pass; + + pass.pc = pc; + + pc->pass_seq++; + nv_pass_flatten(&pass, root); + + nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); + + return 0; +} + +int +nvc0_pc_exec_pass2(struct nv_pc *pc) +{ + int i, ret; + + NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks); + + pc->num_blocks = 0; /* will reorder bb_list */ + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) + return ret; + return 0; +} + +static INLINE boolean +is_cspace_load(struct nv_instruction *nvi) +{ + if (!nvi) + return FALSE; + assert(nvi->indirect != 0); + return (nvi->opcode == NV_OP_LD && + nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && + nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15)); +} + +static INLINE boolean +is_immd32_load(struct nv_instruction *nvi) +{ + if (!nvi) + return FALSE; + return (nvi->opcode == NV_OP_MOV && + nvi->src[0]->value->reg.file == NV_FILE_IMM && + nvi->src[0]->value->reg.size == 4); +} + +static INLINE void +check_swap_src_0_1(struct nv_instruction *nvi) +{ + struct nv_ref *src0 = nvi->src[0]; + struct nv_ref *src1 = nvi->src[1]; + + if (!nv_op_commutative(nvi->opcode) && + NV_BASEOP(nvi->opcode) != NV_OP_SET && + NV_BASEOP(nvi->opcode) != NV_OP_SLCT) + return; + assert(src0 && src1 && src0->value && src1->value); + + if (src1->value->reg.file != NV_FILE_GPR) + return; + + if (is_cspace_load(src0->value->insn)) { + if (!is_cspace_load(src1->value->insn)) { + nvi->src[0] = src1; + nvi->src[1] = src0; + } + } else + if (is_immd32_load(src0->value->insn)) { + if (!is_cspace_load(src1->value->insn) && + !is_immd32_load(src1->value->insn)) { + nvi->src[0] = src1; + nvi->src[1] = src0; + } + } + + if (nvi->src[0] != src0) { + if (NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond); + else + if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) + nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); + } +} + +static void +nvi_set_indirect_load(struct nv_pc *pc, + struct nv_instruction *nvi, struct nv_value *val) +{ + for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect]; + ++nvi->indirect); + assert(nvi->indirect < 6); + nv_reference(pc, nvi, nvi->indirect, val); +} + +static int +nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi, *ld; + int s; + + for (nvi = b->entry; nvi; nvi = nvi->next) { + check_swap_src_0_1(nvi); + + for (s = 0; s < 3 && nvi->src[s]; ++s) { + ld = nvi->src[s]->value->insn; + if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV)) + continue; + if (!nvc0_insn_can_load(nvi, s, ld)) + continue; + + /* fold it ! */ + nv_reference(ctx->pc, nvi, s, ld->src[0]->value); + if (ld->indirect >= 0) + nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value); + + if (!nvc0_insn_refcount(ld)) + nvc0_insn_delete(ld); + } + } + DESCEND_ARBITRARY(s, nvc0_pass_fold_loads); + + return 0; +} + +/* NOTE: Assumes loads have not yet been folded. */ +static int +nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi, *mi, *next; + int j; + uint8_t mod; + + for (nvi = b->entry; nvi; nvi = next) { + next = nvi->next; + if (nvi->opcode == NV_OP_SUB) { + nvi->src[1]->mod ^= NV_MOD_NEG; + nvi->opcode = NV_OP_ADD; + } + + for (j = 0; j < 3 && nvi->src[j]; ++j) { + mi = nvi->src[j]->value->insn; + if (!mi) + continue; + if (mi->def[0]->refc > 1 || mi->predicate >= 0) + continue; + + if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG; + else + if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS; + else + continue; + assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); + + mod |= mi->src[0]->mod; + + if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { + /* abs neg [abs] = abs */ + mod &= ~(NV_MOD_NEG | NV_MOD_ABS); + } else + if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { + /* neg as opcode and modifier on same insn cannot occur */ + /* neg neg abs = abs, neg neg = identity */ + assert(j == 0); + if (mod & NV_MOD_ABS) + nvi->opcode = NV_OP_ABS; + else + nvi->opcode = NV_OP_MOV; + mod = 0; + } + + if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod) + continue; + + nv_reference(ctx->pc, nvi, j, mi->src[0]->value); + + nvi->src[j]->mod ^= mod; + } + + if (nvi->opcode == NV_OP_SAT) { + mi = nvi->src[0]->value->insn; + + if (mi->def[0]->refc > 1 || + (mi->opcode != NV_OP_ADD && + mi->opcode != NV_OP_MUL && + mi->opcode != NV_OP_MAD)) + continue; + mi->saturate = 1; + mi->def[0] = nvi->def[0]; + mi->def[0]->insn = mi; + nvc0_insn_delete(nvi); + } + } + DESCEND_ARBITRARY(j, nv_pass_lower_mods); + + return 0; +} + +#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) + +static void +apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod) +{ + if (mod & NV_MOD_ABS) { + if (type == NV_TYPE_F32) + *val &= 0x7fffffff; + else + if ((*val) & (1 << 31)) + *val = ~(*val) + 1; + } + if (mod & NV_MOD_NEG) { + if (type == NV_TYPE_F32) + *val ^= 0x80000000; + else + *val = ~(*val) + 1; + } + if (mod & NV_MOD_SAT) { + union { + float f; + uint32_t u; + int32_t i; + } u; + u.u = *val; + if (type == NV_TYPE_F32) { + u.f = CLAMP(u.f, -1.0f, 1.0f); + } else + if (type == NV_TYPE_U16) { + u.u = MIN2(u.u, 0xffff); + } else + if (type == NV_TYPE_S16) { + u.i = CLAMP(u.i, -32768, 32767); + } + *val = u.u; + } + if (mod & NV_MOD_NOT) + *val = ~*val; +} + +static void +constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, + struct nv_value *src0, struct nv_value *src1) +{ + struct nv_value *val; + union { + float f32; + uint32_t u32; + int32_t s32; + } u0, u1, u; + ubyte type; + + if (!nvi->def[0]) + return; + type = NV_OPTYPE(nvi->opcode); + + u.u32 = 0; + u0.u32 = src0->reg.imm.u32; + u1.u32 = src1->reg.imm.u32; + + apply_modifiers(&u0.u32, type, nvi->src[0]->mod); + apply_modifiers(&u1.u32, type, nvi->src[1]->mod); + + switch (nvi->opcode) { + case NV_OP_MAD_F32: + if (nvi->src[2]->value->reg.file != NV_FILE_GPR) + return; + /* fall through */ + case NV_OP_MUL_F32: + u.f32 = u0.f32 * u1.f32; + break; + case NV_OP_MUL_B32: + u.u32 = u0.u32 * u1.u32; + break; + case NV_OP_ADD_F32: + u.f32 = u0.f32 + u1.f32; + break; + case NV_OP_ADD_B32: + u.u32 = u0.u32 + u1.u32; + break; + case NV_OP_SUB_F32: + u.f32 = u0.f32 - u1.f32; + break; + /* + case NV_OP_SUB_B32: + u.u32 = u0.u32 - u1.u32; + break; + */ + default: + return; + } + + val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type)); + val->reg.imm.u32 = u.u32; + + nv_reference(pc, nvi, 1, NULL); + nv_reference(pc, nvi, 0, val); + + if (nvi->opcode == NV_OP_MAD_F32) { + nvi->src[1] = nvi->src[0]; + nvi->src[0] = nvi->src[2]; + nvi->src[2] = NULL; + nvi->opcode = NV_OP_ADD_F32; + + if (val->reg.imm.u32 == 0) { + nvi->src[1] = NULL; + nvi->opcode = NV_OP_MOV; + } + } else { + nvi->opcode = NV_OP_MOV; + } +} + +static void +constant_operand(struct nv_pc *pc, + struct nv_instruction *nvi, struct nv_value *val, int s) +{ + union { + float f32; + uint32_t u32; + int32_t s32; + } u; + int shift; + int t = s ? 0 : 1; + uint op; + ubyte type; + + if (!nvi->def[0]) + return; + type = NV_OPTYPE(nvi->opcode); + + u.u32 = val->reg.imm.u32; + apply_modifiers(&u.u32, type, nvi->src[s]->mod); + + if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) { + nvi->opcode = NV_OP_MOV; + nv_reference(pc, nvi, t, NULL); + if (s) { + nvi->src[0] = nvi->src[1]; + nvi->src[1] = NULL; + } + return; + } + + switch (nvi->opcode) { + case NV_OP_MUL_F32: + if (u.f32 == 1.0f || u.f32 == -1.0f) { + if (u.f32 == -1.0f) + nvi->src[t]->mod ^= NV_MOD_NEG; + switch (nvi->src[t]->mod) { + case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; + case NV_MOD_NEG: op = NV_OP_NEG_F32; break; + case NV_MOD_ABS: op = NV_OP_ABS_F32; break; + default: + return; + } + nvi->opcode = op; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + nvi->src[0]->mod = 0; + } else + if (u.f32 == 2.0f || u.f32 == -2.0f) { + if (u.f32 == -2.0f) + nvi->src[t]->mod ^= NV_MOD_NEG; + nvi->opcode = NV_OP_ADD_F32; + nv_reference(pc, nvi, s, nvi->src[t]->value); + nvi->src[s]->mod = nvi->src[t]->mod; + } + break; + case NV_OP_ADD_F32: + if (u.u32 == 0) { + switch (nvi->src[t]->mod) { + case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; + case NV_MOD_NEG: op = NV_OP_NEG_F32; break; + case NV_MOD_ABS: op = NV_OP_ABS_F32; break; + case NV_MOD_NEG | NV_MOD_ABS: + op = NV_OP_CVT; + nvi->ext.cvt.s = nvi->ext.cvt.d = type; + break; + default: + return; + } + nvi->opcode = op; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + if (nvi->opcode != NV_OP_CVT) + nvi->src[0]->mod = 0; + } + break; + case NV_OP_ADD_B32: + if (u.u32 == 0) { + assert(nvi->src[t]->mod == 0); + nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV; + nvi->ext.cvt.s = nvi->ext.cvt.d = type; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + } + break; + case NV_OP_MUL_B32: + /* multiplication by 0 already handled above */ + assert(nvi->src[s]->mod == 0); + shift = ffs(u.s32) - 1; + if (shift == 0) { + nvi->opcode = NV_OP_MOV; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + } else + if (u.s32 > 0 && u.s32 == (1 << shift)) { + nvi->opcode = NV_OP_SHL; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, val); + break; + } + break; + case NV_OP_RCP: + u.f32 = 1.0f / u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; + nvi->opcode = NV_OP_MOV; + assert(s == 0); + nv_reference(pc, nvi, 0, val); + break; + case NV_OP_RSQ: + u.f32 = 1.0f / sqrtf(u.f32); + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; + nvi->opcode = NV_OP_MOV; + assert(s == 0); + nv_reference(pc, nvi, 0, val); + break; + default: + break; + } +} + +static void +handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + + if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod)) + return; + if (src0->reg.file != NV_FILE_GPR) + return; + nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0); + nvc0_insn_delete(nvi); +} + +/* check if we can MUL + ADD -> MAD/FMA */ +static void +handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + struct nv_value *src; + int s; + uint8_t mod[4]; + + if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0; + else + if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1; + else + return; + + if ((src0->insn && src0->insn->bb != nvi->bb) || + (src1->insn && src1->insn->bb != nvi->bb)) + return; + + /* check for immediates from prior constant folding */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + return; + src = nvi->src[s]->value; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[1]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + return; + + nvi->opcode = NV_OP_MAD_F32; + + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; + nvi->src[!s] = NULL; + + nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value); + nvi->src[0]->mod = mod[2] ^ mod[s]; + nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value); + nvi->src[1]->mod = mod[3]; +} + +static int +nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi, *next; + int j; + + for (nvi = b->entry; nvi; nvi = next) { + struct nv_value *src0, *src1; + uint baseop = NV_BASEOP(nvi->opcode); + + next = nvi->next; + + src0 = nvc0_pc_find_immediate(nvi->src[0]); + src1 = nvc0_pc_find_immediate(nvi->src[1]); + + if (src0 && src1) { + constant_expression(ctx->pc, nvi, src0, src1); + } else { + if (src0) + constant_operand(ctx->pc, nvi, src0, 0); + else + if (src1) + constant_operand(ctx->pc, nvi, src1, 1); + } + + if (baseop == NV_OP_MIN || baseop == NV_OP_MAX) + handle_min_max(ctx, nvi); + else + if (nvi->opcode == NV_OP_ADD_F32) + handle_add_mul(ctx, nvi); + } + DESCEND_ARBITRARY(j, nv_pass_algebraic_opt); + + return 0; +} + +/* TODO: redundant store elimination */ + +struct mem_record { + struct mem_record *next; + struct nv_instruction *insn; + uint32_t ofst; + uint32_t base; + uint32_t size; +}; + +#define MEM_RECORD_POOL_SIZE 1024 + +struct pass_reld_elim { + struct nv_pc *pc; + + struct mem_record *imm; + struct mem_record *mem_v; + struct mem_record *mem_a; + struct mem_record *mem_c[16]; + struct mem_record *mem_l; + + struct mem_record pool[MEM_RECORD_POOL_SIZE]; + int alloc; +}; + +/* Extend the load operation in @rec to also cover the data loaded by @ld. + * The two loads may not overlap but reference adjacent memory locations. + */ +static void +combine_load(struct nv_pc *pc, struct mem_record *rec, + struct nv_instruction *ld) +{ + struct nv_instruction *fv = rec->insn; + struct nv_value *mem = ld->src[0]->value; + uint32_t size = rec->size + mem->reg.size; + int j; + int d = rec->size / 4; + + assert(rec->size < 16); + if (rec->ofst > mem->reg.address) { + if ((size == 8 && mem->reg.address & 3) || + (size > 8 && mem->reg.address & 7)) + return; + rec->ofst = mem->reg.address; + for (j = 0; j < d; ++j) + fv->def[mem->reg.size / 4 + j] = fv->def[j]; + d = 0; + } else + if ((size == 8 && rec->ofst & 3) || + (size > 8 && rec->ofst & 7)) { + return; + } + + for (j = 0; j < mem->reg.size / 4; ++j) { + fv->def[d] = ld->def[j]; + fv->def[d++]->insn = fv; + } + + if (fv->src[0]->value->refc > 1) + nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value)); + fv->src[0]->value->reg.address = rec->ofst; + fv->src[0]->value->reg.size = rec->size = size; + + nvc0_insn_delete(ld); +} + +static void +combine_export(struct mem_record *rec, struct nv_instruction *ex) +{ + +} + +static INLINE void +add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec, + uint32_t base, uint32_t ofst, struct nv_instruction *nvi) +{ + struct mem_record *it = &ctx->pool[ctx->alloc++]; + + it->next = *rec; + *rec = it; + it->base = base; + it->ofst = ofst; + it->insn = nvi; + it->size = nvi->src[0]->value->reg.size; +} + +/* vectorize and reuse loads from memory or of immediates */ +static int +nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ + struct mem_record **rec, *it; + struct nv_instruction *ld, *next; + struct nv_value *mem; + uint32_t base, ofst; + int s; + + for (ld = b->entry; ld; ld = next) { + next = ld->next; + + if (is_cspace_load(ld)) { + mem = ld->src[0]->value; + rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)]; + } else + if (ld->opcode == NV_OP_VFETCH) { + mem = ld->src[0]->value; + rec = &ctx->mem_a; + } else + if (ld->opcode == NV_OP_EXPORT) { + mem = ld->src[0]->value; + if (mem->reg.file != NV_FILE_MEM_V) + continue; + rec = &ctx->mem_v; + } else { + continue; + } + if (ld->def[0] && ld->def[0]->refc == 0) + continue; + ofst = mem->reg.address; + base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0; + + for (it = *rec; it; it = it->next) { + if (it->base == base && + ((it->ofst >> 4) == (ofst >> 4)) && + ((it->ofst + it->size == ofst) || + (it->ofst - mem->reg.size == ofst))) { + /* only NV_OP_VFETCH can load exactly 12 bytes */ + if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) + continue; + if (it->ofst < ofst) { + if ((it->ofst & 0xf) == 4) + continue; + } else + if ((ofst & 0xf) == 4) + continue; + break; + } + } + if (it) { + switch (ld->opcode) { + case NV_OP_EXPORT: combine_export(it, ld); break; + default: + combine_load(ctx->pc, it, ld); + break; + } + } else + if (ctx->alloc < MEM_RECORD_POOL_SIZE) { + add_mem_record(ctx, rec, base, ofst, ld); + } + } + + ctx->alloc = 0; + ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL; + for (s = 0; s < 16; ++s) + ctx->mem_c[s] = NULL; + + DESCEND_ARBITRARY(s, nv_pass_mem_opt); + return 0; +} + +static void +eliminate_store(struct mem_record *rec, struct nv_instruction *st) +{ +} + +/* elimination of redundant stores */ +static int +pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ + struct mem_record **rec, *it; + struct nv_instruction *st, *next; + struct nv_value *mem; + uint32_t base, ofst, size; + int s; + + for (st = b->entry; st; st = next) { + next = st->next; + + if (st->opcode == NV_OP_ST) { + mem = st->src[0]->value; + rec = &ctx->mem_l; + } else + if (st->opcode == NV_OP_EXPORT) { + mem = st->src[0]->value; + if (mem->reg.file != NV_FILE_MEM_V) + continue; + rec = &ctx->mem_v; + } else + if (st->opcode == NV_OP_ST) { + /* TODO: purge */ + } + ofst = mem->reg.address; + base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0; + size = mem->reg.size; + + for (it = *rec; it; it = it->next) { + if (it->base == base && + (it->ofst <= ofst && (it->ofst + size) > ofst)) + break; + } + if (it) + eliminate_store(it, st); + else + add_mem_record(ctx, rec, base, ofst, st); + } + + DESCEND_ARBITRARY(s, nv_pass_mem_opt); + return 0; +} + +/* TODO: properly handle loads from l[] memory in the presence of stores */ +static int +nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ +#if 0 + struct load_record **rec, *it; + struct nv_instruction *ld, *next; + uint64_t data[2]; + struct nv_value *val; + int j; + + for (ld = b->entry; ld; ld = next) { + next = ld->next; + if (!ld->src[0]) + continue; + val = ld->src[0]->value; + rec = NULL; + + if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { + data[0] = val->reg.id; + data[1] = 0; + rec = &ctx->mem_v; + } else + if (ld->opcode == NV_OP_LDA) { + data[0] = val->reg.id; + data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; + if (val->reg.file >= NV_FILE_MEM_C(0) && + val->reg.file <= NV_FILE_MEM_C(15)) + rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; + else + if (val->reg.file == NV_FILE_MEM_S) + rec = &ctx->mem_s; + else + if (val->reg.file == NV_FILE_MEM_L) + rec = &ctx->mem_l; + } else + if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { + data[0] = val->reg.imm.u32; + data[1] = 0; + rec = &ctx->imm; + } + + if (!rec || !ld->def[0]->refc) + continue; + + for (it = *rec; it; it = it->next) + if (it->data[0] == data[0] && it->data[1] == data[1]) + break; + + if (it) { + if (ld->def[0]->reg.id >= 0) + it->value = ld->def[0]; + else + if (!ld->fixed) + nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value); + } else { + if (ctx->alloc == LOAD_RECORD_POOL_SIZE) + continue; + it = &ctx->pool[ctx->alloc++]; + it->next = *rec; + it->data[0] = data[0]; + it->data[1] = data[1]; + it->value = ld->def[0]; + *rec = it; + } + } + + ctx->imm = NULL; + ctx->mem_s = NULL; + ctx->mem_v = NULL; + for (j = 0; j < 16; ++j) + ctx->mem_c[j] = NULL; + ctx->mem_l = NULL; + ctx->alloc = 0; + + DESCEND_ARBITRARY(j, nv_pass_reload_elim); +#endif + return 0; +} + +static int +nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b) +{ + int i, c, j; + + for (i = 0; i < ctx->pc->num_instructions; ++i) { + struct nv_instruction *nvi = &ctx->pc->instructions[i]; + struct nv_value *def[4]; + + if (!nv_is_texture_op(nvi->opcode)) + continue; + nvi->tex_mask = 0; + + for (c = 0; c < 4; ++c) { + if (nvi->def[c]->refc) + nvi->tex_mask |= 1 << c; + def[c] = nvi->def[c]; + } + + j = 0; + for (c = 0; c < 4; ++c) + if (nvi->tex_mask & (1 << c)) + nvi->def[j++] = def[c]; + for (c = 0; c < 4; ++c) + if (!(nvi->tex_mask & (1 << c))) + nvi->def[j++] = def[c]; + assert(j == 4); + } + return 0; +} + +struct nv_pass_dce { + struct nv_pc *pc; + uint removed; +}; + +static int +nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) +{ + int j; + struct nv_instruction *nvi, *next; + + for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) { + next = nvi->next; + + if (inst_removable(nvi)) { + nvc0_insn_delete(nvi); + ++ctx->removed; + } + } + DESCEND_ARBITRARY(j, nv_pass_dce); + + return 0; +} + +/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. + * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with + * BREAK and dummy ELSE block. + */ +static INLINE boolean +bb_is_if_else_endif(struct nv_basic_block *bb) +{ + if (!bb->out[0] || !bb->out[1]) + return FALSE; + + if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) { + return (bb->out[0]->out[1] == bb->out[1]->out[0] && + !bb->out[1]->out[1]); + } else { + return (bb->out[0]->out[0] == bb->out[1]->out[0] && + !bb->out[0]->out[1] && + !bb->out[1]->out[1]); + } +} + +/* Predicate instructions and delete any branch at the end if it is + * not a break from a loop. + */ +static void +predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, + struct nv_value *pred, uint8_t cc) +{ + struct nv_instruction *nvi, *prev; + int s; + + if (!b->entry) + return; + for (nvi = b->entry; nvi; nvi = nvi->next) { + prev = nvi; + if (inst_is_noop(nvi)) + continue; + for (s = 0; nvi->src[s]; ++s); + assert(s < 6); + nvi->predicate = s; + nvi->cc = cc; + nv_reference(pc, nvi, nvi->predicate, pred); + } + if (prev->opcode == NV_OP_BRA && + b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && + b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) + nvc0_insn_delete(prev); +} + +static INLINE boolean +may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) +{ + if (nvi->def[0] && values_equal(nvi->def[0], pred)) + return FALSE; + return nvc0_insn_is_predicateable(nvi); +} + +/* Transform IF/ELSE/ENDIF constructs into predicated instructions + * where feasible. + */ +static int +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi; + struct nv_value *pred; + int k; + int n0, n1; /* instruction counts of outgoing blocks */ + + if (bb_is_if_else_endif(b)) { + assert(b->exit && b->exit->opcode == NV_OP_BRA); + + assert(b->exit->predicate >= 0); + pred = b->exit->src[b->exit->predicate]->value; + + n1 = n0 = 0; + for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) + if (!may_predicate_insn(nvi, pred)) + break; + if (!nvi) { + /* we're after register allocation, so there always is an ELSE block */ + for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) + if (!may_predicate_insn(nvi, pred)) + break; + } + + /* 12 is an arbitrary limit */ + if (!nvi && n0 < 12 && n1 < 12) { + predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); + predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); + + nvc0_insn_delete(b->exit); /* delete the branch */ + + /* and a potential joinat before it */ + if (b->exit && b->exit->opcode == NV_OP_JOINAT) + nvc0_insn_delete(b->exit); + + /* remove join operations at the end of the conditional */ + k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + if ((nvi = b->out[0]->out[k]->entry)) { + nvi->join = 0; + if (nvi->opcode == NV_OP_JOIN) + nvc0_insn_delete(nvi); + } + } + } + DESCEND_ARBITRARY(k, nv_pass_flatten); + + return 0; +} + +/* Tests instructions for equality, but independently of sources. */ +static boolean +is_operation_equal(struct nv_instruction *a, struct nv_instruction *b) +{ + if (a->opcode != b->opcode) + return FALSE; + if (nv_is_texture_op(a->opcode)) { + if (a->ext.tex.t != b->ext.tex.t || + a->ext.tex.s != b->ext.tex.s) + return FALSE; + if (a->tex_dim != b->tex_dim || + a->tex_array != b->tex_array || + a->tex_cube != b->tex_cube || + a->tex_shadow != b->tex_shadow || + a->tex_live != b->tex_live) + return FALSE; + } else + if (a->opcode == NV_OP_CVT) { + if (a->ext.cvt.s != b->ext.cvt.s || + a->ext.cvt.d != b->ext.cvt.d) + return FALSE; + } else + if (NV_BASEOP(a->opcode) == NV_OP_SET || + NV_BASEOP(a->opcode) == NV_OP_SLCT) { + if (a->set_cond != b->set_cond) + return FALSE; + } else + if (a->opcode == NV_OP_LINTERP || + a->opcode == NV_OP_PINTERP) { + if (a->centroid != b->centroid || + a->flat != b->flat) + return FALSE; + } + if (a->cc != b->cc) + return FALSE; + if (a->lanes != b->lanes || + a->patch != b->patch || + a->saturate != b->saturate) + return FALSE; + if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */ + return FALSE; + return TRUE; +} + +/* local common subexpression elimination, stupid O(n^2) implementation */ +static int +nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *ir, *ik, *next; + struct nv_instruction *entry = b->phi ? b->phi : b->entry; + int s, d; + unsigned int reps; + + do { + reps = 0; + for (ir = entry; ir; ir = next) { + next = ir->next; + if (ir->fixed) + continue; + for (ik = entry; ik != ir; ik = ik->next) { + if (!is_operation_equal(ir, ik)) + continue; + if (!ir->def[0] || !ik->def[0]) + continue; + + if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) + continue; + + for (d = 0; d < 4; ++d) { + if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0)) + break; + if (ir->def[d]) { + if (!values_equal(ik->def[0], ir->def[0])) + break; + } else { + d = 4; + break; + } + } + if (d != 4) + continue; + + for (s = 0; s < 5; ++s) { + struct nv_value *a, *b; + + if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0)) + break; + if (!ir->src[s]) { + s = 5; + break; + } + + if (ik->src[s]->mod != ir->src[s]->mod) + break; + a = ik->src[s]->value; + b = ir->src[s]->value; + if (a == b) + continue; + if (a->reg.file != b->reg.file || + a->reg.id < 0 || /* this excludes memory loads/stores */ + a->reg.id != b->reg.id) + break; + } + if (s == 5) { + nvc0_insn_delete(ir); + for (d = 0; d < 4 && ir->def[d]; ++d) + nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]); + ++reps; + break; + } + } + } + } while(reps); + + DESCEND_ARBITRARY(s, nv_pass_cse); + + return 0; +} + +/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy + * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. + */ +static int +nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_value *val; + struct nv_instruction *bnd, *nvi, *next; + int s; + + for (bnd = b->entry; bnd; bnd = next) { + next = bnd->next; + if (bnd->opcode != NV_OP_BIND) + continue; + for (s = 0; s < 4 && bnd->src[s]; ++s) { + val = bnd->src[s]->value; + + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); + } + } + DESCEND_ARBITRARY(s, nv_pass_fix_bind); + + return 0; +} + +static int +nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) +{ + struct pass_reld_elim *reldelim; + struct nv_pass pass; + struct nv_pass_dce dce; + int ret; + + pass.n = 0; + pass.pc = pc; + + /* Do CSE so we can just compare values by pointer in subsequent passes. */ + pc->pass_seq++; + ret = nv_pass_cse(&pass, root); + if (ret) + return ret; + + /* Do this first, so we don't have to pay attention + * to whether sources are supported memory loads. + */ + pc->pass_seq++; + ret = nv_pass_algebraic_opt(&pass, root); + if (ret) + return ret; + + pc->pass_seq++; + ret = nv_pass_lower_mods(&pass, root); + if (ret) + return ret; + + pc->pass_seq++; + ret = nvc0_pass_fold_loads(&pass, root); + if (ret) + return ret; + + if (pc->opt_reload_elim) { + reldelim = CALLOC_STRUCT(pass_reld_elim); + reldelim->pc = pc; + + pc->pass_seq++; + ret = nv_pass_reload_elim(reldelim, root); + if (ret) { + FREE(reldelim); + return ret; + } + memset(reldelim, 0, sizeof(struct pass_reld_elim)); + reldelim->pc = pc; + } + + /* May run DCE before load-combining since that pass will clean up + * after itself. + */ + dce.pc = pc; + do { + dce.removed = 0; + pc->pass_seq++; + ret = nv_pass_dce(&dce, root); + if (ret) + return ret; + } while (dce.removed); + + if (pc->opt_reload_elim) { + pc->pass_seq++; + ret = nv_pass_mem_opt(reldelim, root); + if (!ret) { + memset(reldelim, 0, sizeof(struct pass_reld_elim)); + reldelim->pc = pc; + + pc->pass_seq++; + ret = nv_pass_mem_opt(reldelim, root); + } + FREE(reldelim); + if (ret) + return ret; + } + + ret = nv_pass_tex_mask(&pass, root); + if (ret) + return ret; + + pc->pass_seq++; + ret = nv_pass_fix_bind(&pass, root); + + return ret; +} + +int +nvc0_pc_exec_pass0(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) + return ret; + return 0; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c new file mode 100644 index 0000000000..1f37cb802d --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -0,0 +1,381 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" + +#define PRINT(args...) debug_printf(args) + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +static const char *norm = "\x1b[00m"; +static const char *gree = "\x1b[32m"; +static const char *blue = "\x1b[34m"; +static const char *cyan = "\x1b[36m"; +static const char *yllw = "\x1b[33m"; +static const char *mgta = "\x1b[35m"; + +static const char *nv_cond_names[] = +{ + "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", + "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", + "o", "c", "a", "s" +}; + +static const char *nv_modifier_strings[] = +{ + "", + "neg", + "abs", + "neg abs", + "not", + "not neg" + "not abs", + "not neg abs", + "sat", + "BAD_MOD" +}; + +const char * +nvc0_opcode_name(uint opcode) +{ + return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name; +} + +static INLINE const char * +nv_type_name(ubyte type, ubyte size) +{ + switch (type) { + case NV_TYPE_U16: return "u16"; + case NV_TYPE_S16: return "s16"; + case NV_TYPE_F32: return "f32"; + case NV_TYPE_U32: return "u32"; + case NV_TYPE_S32: return "s32"; + case NV_TYPE_P32: return "p32"; + case NV_TYPE_F64: return "f64"; + case NV_TYPE_ANY: + { + switch (size) { + case 1: return "b8"; + case 2: return "b16"; + case 4: return "b32"; + case 8: return "b64"; + case 12: return "b96"; + case 16: return "b128"; + default: + return "BAD_SIZE"; + } + } + default: + return "BAD_TYPE"; + } +} + +static INLINE const char * +nv_cond_name(ubyte cc) +{ + return nv_cond_names[MIN2(cc, 19)]; +} + +static INLINE const char * +nv_modifier_string(ubyte mod) +{ + return nv_modifier_strings[MIN2(mod, 9)]; +} + +static INLINE int +nv_value_id(struct nv_value *value) +{ + if (value->join->reg.id >= 0) + return value->join->reg.id; + return value->n; +} + +static INLINE boolean +nv_value_allocated(struct nv_value *value) +{ + return (value->reg.id >= 0) ? TRUE : FALSE; +} + +static INLINE void +nv_print_address(const char c, int buf, struct nv_value *a, int offset) +{ + const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; + char sg; + + if (offset < 0) { + sg = '-'; + offset = -offset; + } else { + sg = '+'; + } + + if (buf >= 0) + PRINT(" %s%c%i[", cyan, c, buf); + else + PRINT(" %s%c[", cyan, c); + if (a) + PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg); + PRINT("%s0x%x%s]", yllw, offset, cyan); +} + +static INLINE void +nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type) +{ + char reg_pfx = nv_value_allocated(value->join) ? '$' : '%'; + + if (value->reg.file != NV_FILE_PRED) + PRINT(" %s%s", gree, nv_type_name(type, value->reg.size)); + + switch (value->reg.file) { + case NV_FILE_GPR: + PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); + if (value->reg.size == 8) + PRINT("d"); + if (value->reg.size == 16) + PRINT("q"); + break; + case NV_FILE_PRED: + PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value)); + break; + case NV_FILE_COND: + PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); + break; + case NV_FILE_MEM_L: + nv_print_address('l', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_G: + nv_print_address('g', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_A: + nv_print_address('a', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_V: + nv_print_address('v', -1, indir, value->reg.address); + break; + case NV_FILE_IMM: + switch (type) { + case NV_TYPE_U16: + case NV_TYPE_S16: + PRINT(" %s0x%04x", yllw, value->reg.imm.u32); + break; + case NV_TYPE_F32: + PRINT(" %s%f", yllw, value->reg.imm.f32); + break; + case NV_TYPE_F64: + PRINT(" %s%f", yllw, value->reg.imm.f64); + break; + case NV_TYPE_U32: + case NV_TYPE_S32: + case NV_TYPE_P32: + case NV_TYPE_ANY: + PRINT(" %s0x%08x", yllw, value->reg.imm.u32); + break; + } + break; + default: + if (value->reg.file >= NV_FILE_MEM_C(0) && + value->reg.file <= NV_FILE_MEM_C(15)) + nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir, + value->reg.address); + else + NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); + break; + } +} + +static INLINE void +nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type) +{ + nv_print_value(ref->value, indir, type); +} + +void +nvc0_print_instruction(struct nv_instruction *i) +{ + int s; + + PRINT("%i: ", i->serial); + + if (i->predicate >= 0) { + PRINT("%s%s", gree, i->cc ? "fl" : "tr"); + nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8); + PRINT(" "); + } + + PRINT("%s", gree); + if (NV_BASEOP(i->opcode) == NV_OP_SET) + PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond)); + else + if (i->saturate) + PRINT("sat %s", nvc0_opcode_name(i->opcode)); + else + PRINT("%s", nvc0_opcode_name(i->opcode)); + + if (i->opcode == NV_OP_CVT) + nv_print_value(i->def[0], NULL, i->ext.cvt.d); + else + if (i->def[0]) + nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode)); + else + if (i->target) + PRINT(" %s(BB:%i)", yllw, i->target->id); + else + PRINT(" #"); + + for (s = 1; s < 4 && i->def[s]; ++s) + nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode)); + if (s > 1) + PRINT("%s ,", norm); + + for (s = 0; s < 6 && i->src[s]; ++s) { + ubyte type; + if (s == i->indirect || s == i->predicate) + continue; + if (i->opcode == NV_OP_CVT) + type = i->ext.cvt.s; + else + type = NV_OPTYPE(i->opcode); + + if (i->src[s]->mod) + PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod)); + + if (i->indirect >= 0 && + NV_IS_MEMORY_FILE(i->src[s]->value->reg.file)) + nv_print_ref(i->src[s], i->src[i->indirect]->value, type); + else + nv_print_ref(i->src[s], NULL, type); + } + PRINT(" %s\n", norm); +} + +#define NV_MOD_SGN_12 ((NV_MOD_ABS | NV_MOD_NEG) | ((NV_MOD_ABS | NV_MOD_NEG) << 4)) +#define NV_MOD_NEG_123 (NV_MOD_NEG | (NV_MOD_NEG << 4) | (NV_MOD_NEG << 8)) +#define NV_MOD_NEG_3 (NV_MOD_NEG << 8) + +#define NV_MOD_SGN NV_MOD_SGN_12 + +struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = +{ + { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, + { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, + { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + + { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + + { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, + { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SLCT, "slct", NV_TYPE_F32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_S32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_U32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, + + { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, + + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + + { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } +}; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c new file mode 100644 index 0000000000..f4afe083e2 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -0,0 +1,1051 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define NOUVEAU_DEBUG 1 + +/* #define NVC0_RA_DEBUG_LIVEI */ +/* #define NVC0_RA_DEBUG_LIVE_SETS */ +/* #define NVC0_RA_DEBUG_JOIN */ + +#include "nvc0_pc.h" +#include "util/u_simple_list.h" + +#define NVC0_NUM_REGISTER_FILES 3 + +/* @unit_shift: log2 of min allocation unit for register */ +struct register_set { + uint32_t bits[NVC0_NUM_REGISTER_FILES][2]; + uint32_t last[NVC0_NUM_REGISTER_FILES]; + int log2_unit[NVC0_NUM_REGISTER_FILES]; + struct nv_pc *pc; +}; + +/* aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0]; + dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + set->bits[i][0] = (set->bits[i][0] | mask) & umask; + set->bits[i][1] = (set->bits[i][1] | mask) & umask; + } +} + +struct nv_pc_pass { + struct nv_pc *pc; + struct nv_instruction **insns; + uint num_insns; + uint pass_seq; +}; + +static void +ranges_coalesce(struct nv_range *range) +{ + while (range->next && range->end >= range->next->bgn) { + struct nv_range *rnn = range->next->next; + assert(range->bgn <= range->next->bgn); + range->end = MAX2(range->end, range->next->end); + FREE(range->next); + range->next = rnn; + } +} + +static boolean +add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) +{ + struct nv_range *range, **nextp = &val->livei; + + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + + for (range = val->livei; range; range = range->next) { + if (end < range->bgn) + break; /* insert before */ + + if (bgn > range->end) { + nextp = &range->next; + continue; /* insert after */ + } + + /* overlap */ + if (bgn < range->bgn) { + range->bgn = bgn; + if (end > range->end) + range->end = end; + ranges_coalesce(range); + return TRUE; + } + if (end > range->end) { + range->end = end; + ranges_coalesce(range); + return TRUE; + } + assert(bgn >= range->bgn); + assert(end <= range->end); + return TRUE; + } + + if (!new_range) + new_range = CALLOC_STRUCT(nv_range); + + new_range->bgn = bgn; + new_range->end = end; + new_range->next = range; + *(nextp) = new_range; + return FALSE; +} + +static void +add_range(struct nv_value *val, struct nv_basic_block *b, int end) +{ + int bgn; + + if (!val->insn) /* ignore non-def values */ + return; + assert(b->entry->serial <= b->exit->serial); + assert(b->phi->serial <= end); + assert(b->exit->serial + 1 >= end); + + bgn = val->insn->serial; + if (bgn < b->entry->serial || bgn > b->exit->serial) + bgn = b->entry->serial; + + assert(bgn <= end); + + add_range_ex(val, bgn, end, NULL); +} + +#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI) +static void +livei_print(struct nv_value *a) +{ + struct nv_range *r = a->livei; + + debug_printf("livei %i: ", a->n); + while (r) { + debug_printf("[%i, %i) ", r->bgn, r->end); + r = r->next; + } + debug_printf("\n"); +} +#endif + +static void +livei_unify(struct nv_value *dst, struct nv_value *src) +{ + struct nv_range *range, *next; + + for (range = src->livei; range; range = next) { + next = range->next; + if (add_range_ex(dst, range->bgn, range->end, range)) + FREE(range); + } + src->livei = NULL; +} + +static void +livei_release(struct nv_value *val) +{ + struct nv_range *range, *next; + + for (range = val->livei; range; range = next) { + next = range->next; + FREE(range); + } +} + +static boolean +livei_have_overlap(struct nv_value *a, struct nv_value *b) +{ + struct nv_range *r_a, *r_b; + + for (r_a = a->livei; r_a; r_a = r_a->next) { + for (r_b = b->livei; r_b; r_b = r_b->next) { + if (r_b->bgn < r_a->end && + r_b->end > r_a->bgn) + return TRUE; + } + } + return FALSE; +} + +static int +livei_end(struct nv_value *a) +{ + struct nv_range *r = a->livei; + + assert(r); + while (r->next) + r = r->next; + return r->end; +} + +static boolean +livei_contains(struct nv_value *a, int pos) +{ + struct nv_range *r; + + for (r = a->livei; r && r->bgn <= pos; r = r->next) + if (r->end > pos) + return TRUE; + return FALSE; +} + +static boolean +reg_assign(struct register_set *set, struct nv_value **def, int n) +{ + int i, id, s, k; + uint32_t m; + int f = def[0]->reg.file; + + k = n; + if (k == 3) + k = 4; + s = (k * def[0]->reg.size) >> set->log2_unit[f]; + m = (1 << s) - 1; + + id = set->last[f]; + + for (i = 0; i * 32 < set->last[f]; ++i) { + if (set->bits[f][i] == 0xffffffff) + continue; + + for (id = 0; id < 32; id += s) + if (!(set->bits[f][i] & (m << id))) + break; + if (id < 32) + break; + } + if (i * 32 + id > set->last[f]) + return FALSE; + + set->bits[f][i] |= m << id; + + id += i * 32; + + set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1); + + for (i = 0; i < n; ++i) + if (def[i]->livei) + def[i]->reg.id = id++; + + return TRUE; +} + +static INLINE void +reg_occupy(struct register_set *set, struct nv_value *val) +{ + int id = val->reg.id, f = val->reg.file; + uint32_t m; + + if (id < 0) + return; + m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; + + set->bits[f][id / 32] |= m << (id % 32); + + if (set->pc->max_reg[f] < id) + set->pc->max_reg[f] = id; +} + +static INLINE void +reg_release(struct register_set *set, struct nv_value *val) +{ + int id = val->reg.id, f = val->reg.file; + uint32_t m; + + if (id < 0) + return; + m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; + + set->bits[f][id / 32] &= ~(m << (id % 32)); +} + +static INLINE boolean +join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ + int i; + struct nv_value *val; + + if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) + return FALSE; + + if (a->join->reg.id == b->join->reg.id) + return TRUE; + + /* either a or b or both have been assigned */ + + if (a->join->reg.id >= 0 && b->join->reg.id >= 0) + return FALSE; + else + if (b->join->reg.id >= 0) { + if (b->join->reg.id == 63) + return FALSE; + val = a; + a = b; + b = val; + } else + if (a->join->reg.id == 63) + return FALSE; + + for (i = 0; i < ctx->pc->num_values; ++i) { + val = &ctx->pc->values[i]; + + if (val->join->reg.id != a->join->reg.id) + continue; + if (val->join != a->join && livei_have_overlap(val->join, b->join)) + return FALSE; + } + return TRUE; +} + +static INLINE void +do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ + int j; + struct nv_value *bjoin = b->join; + + if (b->join->reg.id >= 0) + a->join->reg.id = b->join->reg.id; + + livei_unify(a->join, b->join); + +#ifdef NVC0_RA_DEBUG_JOIN + debug_printf("joining %i to %i\n", b->n, a->n); +#endif + + /* make a->join the new representative */ + for (j = 0; j < ctx->pc->num_values; ++j) + if (ctx->pc->values[j].join == bjoin) + ctx->pc->values[j].join = a->join; + + assert(b->join == a->join); +} + +static INLINE boolean +try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ + if (!join_allowed(ctx, a, b)) { +#ifdef NVC0_RA_DEBUG_JOIN + debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); +#endif + return FALSE; + } + if (livei_have_overlap(a->join, b->join)) { +#ifdef NVC0_RA_DEBUG_JOIN + debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n); + livei_print(a); + livei_print(b); +#endif + return FALSE; + } + + do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } +} + +static INLINE boolean +need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) +{ + int i = 0, n = 0; + + for (; i < 2; ++i) + if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i])) + ++n; + + return (b->num_in > 1) && (n == 2); +} + +/* Look for the @phi's operand whose definition reaches @b. */ +static int +phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, + struct nv_basic_block *tb) +{ + struct nv_ref *srci, *srcj; + int i, j; + + for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL)) + continue; + /* NOTE: back-edges are ignored by the reachable-by check */ + if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { + j = i; + srcj = srci; + } + } + if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvc0_bblock_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; + return j; +} + +/* For each operand of each PHI in b, generate a new value by inserting a MOV + * at the end of the block it is coming from and replace the operand with its + * result. This eliminates liveness conflicts and enables us to let values be + * copied to the right register if such a conflict exists nonetheless. + * + * These MOVs are also crucial in making sure the live intervals of phi srces + * are extended until the end of the loop, since they are not included in the + * live-in sets. + */ +static int +pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *i, *ni; + struct nv_value *val; + struct nv_basic_block *p, *pn; + int n, j; + + b->pass_seq = ctx->pc->pass_seq; + + for (n = 0; n < b->num_in; ++n) { + p = pn = b->in[n]; + assert(p); + + if (need_new_else_block(b, p)) { + pn = new_basic_block(ctx->pc); + + if (p->out[0] == b) + p->out[0] = pn; + else + p->out[1] = pn; + + if (p->exit->target == b) /* target to new else-block */ + p->exit->target = pn; + + b->in[n] = pn; + + pn->out[0] = b; + pn->in[0] = p; + pn->num_in = 1; + } + ctx->pc->current_block = pn; + + for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } + } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 6); /* XXX: really ugly shaders */ + + ni = new_instruction(ctx->pc, NV_OP_MOV); + if (ni->prev && ni->prev->target) + nvc0_insns_permute(ni->prev, ni); + + ni->def[0] = new_value_like(ctx->pc, val); + ni->def[0]->insn = ni; + nv_reference(ctx->pc, ni, 0, val); + nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; + } + + if (pn != p && pn->exit) { + ctx->pc->current_block = b->in[n ? 0 : 1]; + ni = new_instruction(ctx->pc, NV_OP_BRA); + ni->target = b; + ni->terminator = 1; + } + } + + for (j = 0; j < 2; ++j) + if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) + pass_generate_phi_movs(ctx, b->out[j]); + + return 0; +} + +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_BIND (1 << 3) + +static int +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) +{ + int c, n; + + for (n = 0; n < ctx->num_insns; ++n) { + struct nv_instruction *i = ctx->insns[n]; + + switch (i->opcode) { + case NV_OP_PHI: + if (!(mask & JOIN_MASK_PHI)) + break; + for (c = 0; c < 6 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); + break; + case NV_OP_MOV: + if (!(mask & JOIN_MASK_MOV)) + break; + if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1]) + try_join_values(ctx, i->def[0], i->src[0]->value); + break; + case NV_OP_SELECT: + if (!(mask & JOIN_MASK_SELECT)) + break; + for (c = 0; c < 6 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); + break; + case NV_OP_BIND: + if (!(mask & JOIN_MASK_BIND)) + break; + for (c = 0; c < 4 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); + break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ + default: + break; + } + } + return 0; +} + +/* Order the instructions so that live intervals can be expressed in numbers. */ +static void +pass_order_instructions(void *priv, struct nv_basic_block *b) +{ + struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; + struct nv_instruction *i; + + b->pass_seq = ctx->pc->pass_seq; + + assert(!b->exit || !b->exit->next); + for (i = b->phi; i; i = i->next) { + i->serial = ctx->num_insns; + ctx->insns[ctx->num_insns++] = i; + } +} + +static void +bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b) +{ +#ifdef NVC0_RA_DEBUG_LIVE_SETS + struct nv_value *val; + int j; + + debug_printf("LIVE-INs of BB:%i: ", b->id); + + for (j = 0; j < pc->num_values; ++j) { + if (!(b->live_set[j / 32] & (1 << (j % 32)))) + continue; + val = &pc->values[j]; + if (!val->insn) + continue; + debug_printf("%i ", val->n); + } + debug_printf("\n"); +#endif +} + +static INLINE void +live_set_add(struct nv_basic_block *b, struct nv_value *val) +{ + if (!val->insn) /* don't add non-def values */ + return; + b->live_set[val->n / 32] |= 1 << (val->n % 32); +} + +static INLINE void +live_set_rem(struct nv_basic_block *b, struct nv_value *val) +{ + b->live_set[val->n / 32] &= ~(1 << (val->n % 32)); +} + +static INLINE boolean +live_set_test(struct nv_basic_block *b, struct nv_ref *ref) +{ + int n = ref->value->n; + return b->live_set[n / 32] & (1 << (n % 32)); +} + +/* The live set of a block contains those values that are live immediately + * before the beginning of the block, so do a backwards scan. + */ +static int +pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *i; + int j, n, ret = 0; + + if (b->pass_seq >= ctx->pc->pass_seq) + return 0; + b->pass_seq = ctx->pc->pass_seq; + + /* slight hack for undecidedness: set phi = entry if it's undefined */ + if (!b->phi) + b->phi = b->entry; + + for (n = 0; n < 2; ++n) { + if (!b->out[n] || b->out[n] == b) + continue; + ret = pass_build_live_sets(ctx, b->out[n]); + if (ret) + return ret; + + if (n == 0) { + for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) + b->live_set[j] = b->out[n]->live_set[j]; + } else { + for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) + b->live_set[j] |= b->out[n]->live_set[j]; + } + } + + if (!b->entry) + return 0; + + bb_live_set_print(ctx->pc, b); + + for (i = b->exit; i != b->entry->prev; i = i->prev) { + for (j = 0; j < 5 && i->def[j]; j++) + live_set_rem(b, i->def[j]); + for (j = 0; j < 6 && i->src[j]; j++) + live_set_add(b, i->src[j]->value); + } + for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) + live_set_rem(b, i->def[0]); + + bb_live_set_print(ctx->pc, b); + + return 0; +} + +static void collect_live_values(struct nv_basic_block *b, const int n) +{ + int i; + + /* XXX: what to do about back/fake-edges (used to include both here) ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { + for (i = 0; i < n; ++i) + b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; + } else { + memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); + } + } else + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { + memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); + } else { + memset(b->live_set, 0, n * sizeof(uint32_t)); + } +} + +/* NOTE: the live intervals of phi functions start at the first non-phi insn. */ +static int +pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *i, *i_stop; + int j, s; + const int n = (ctx->pc->num_values + 31) / 32; + + /* verify that first block does not have live-in values */ + if (b->num_in == 0) + for (j = 0; j < n; ++j) + assert(b->live_set[j] == 0); + + collect_live_values(b, n); + + /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */ + for (j = 0; j < 2; ++j) { + if (!b->out[j] || !b->out[j]->phi) + continue; + for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) { + live_set_rem(b, i->def[0]); + + for (s = 0; s < 6 && i->src[s]; ++s) { + assert(i->src[s]->value->insn); + if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb, + b->out[j])) + live_set_add(b, i->src[s]->value); + else + live_set_rem(b, i->src[s]->value); + } + } + } + + /* remaining live-outs are live until the end */ + if (b->exit) { + for (j = 0; j < ctx->pc->num_values; ++j) { + if (!(b->live_set[j / 32] & (1 << (j % 32)))) + continue; + add_range(&ctx->pc->values[j], b, b->exit->serial + 1); +#ifdef NVC0_RA_DEBUG_LIVEI + debug_printf("adding range for live value %i: ", j); + livei_print(&ctx->pc->values[j]); +#endif + } + } + + i_stop = b->entry ? b->entry->prev : NULL; + + /* don't have to include phi functions here (will have 0 live range) */ + for (i = b->exit; i != i_stop; i = i->prev) { + assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial); + for (j = 0; j < 4 && i->def[j]; ++j) + live_set_rem(b, i->def[j]); + + for (j = 0; j < 6 && i->src[j]; ++j) { + if (!live_set_test(b, i->src[j])) { + live_set_add(b, i->src[j]->value); + add_range(i->src[j]->value, b, i->serial); +#ifdef NVC0_RA_DEBUG_LIVEI + debug_printf("adding range for source %i (ends living): ", + i->src[j]->value->n); + livei_print(i->src[j]->value); +#endif + } + } + } + + b->pass_seq = ctx->pc->pass_seq; + + if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) + pass_build_intervals(ctx, b->out[0]); + + if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) + pass_build_intervals(ctx, b->out[1]); + + return 0; +} + +static INLINE void +nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) +{ + memset(set, 0, sizeof(*set)); + + set->last[NV_FILE_GPR] = 62; + set->last[NV_FILE_PRED] = 6; + set->last[NV_FILE_COND] = 1; + + set->log2_unit[NV_FILE_GPR] = 2; + set->log2_unit[NV_FILE_COND] = 0; + set->log2_unit[NV_FILE_PRED] = 0; + + set->pc = pc; +} + +static void +insert_ordered_tail(struct nv_value *list, struct nv_value *nval) +{ + struct nv_value *elem; + + for (elem = list->prev; + elem != list && elem->livei->bgn > nval->livei->bgn; + elem = elem->prev); + /* now elem begins before or at the same time as val */ + + nval->prev = elem; + nval->next = elem->next; + elem->next->prev = nval; + elem->next = nval; +} + +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) +{ + struct nv_value *val; + int k, n; + + make_empty_list(head); + + for (n = 0; n < ctx->num_insns; ++n) { + struct nv_instruction *i = ctx->insns[n]; + + /* for joined values, only the representative will have livei != NULL */ + for (k = 0; k < 5; ++k) { + if (i->def[k] && i->def[k]->livei) + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); + } + } + + for (val = head->next; val != head->prev; val = val->next) { + assert(val->join == val); + assert(val->livei->bgn <= val->next->livei->bgn); + } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nvc0_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); + + foreach_s(cur, tmp[0], &unhandled) { + remove_from_list(cur); + + foreach_s(val, tmp[1], &active) { + if (livei_end(val) <= cur->livei->bgn) { + reg_release(&free, val); + move_to_head(&handled, val); + } else + if (!livei_contains(val, cur->livei->bgn)) { + reg_release(&free, val); + move_to_head(&inactive, val); + } + } + + foreach_s(val, tmp[1], &inactive) { + if (livei_end(val) <= cur->livei->bgn) + move_to_head(&handled, val); + else + if (livei_contains(val, cur->livei->bgn)) { + reg_occupy(&free, val); + move_to_head(&active, val); + } + } + + f = free; + + foreach(val, &inactive) + if (livei_have_overlap(val, cur)) + reg_occupy(&f, val); + + foreach(val, &unhandled) + if (val->reg.id >= 0 && livei_have_overlap(val, cur)) + reg_occupy(&f, val); + + if (cur->reg.id < 0) { + boolean mem = !reg_assign(&f, &cur, 1); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + insert_at_head(&active, cur); + reg_occupy(&free, cur); + } + + return 0; +} + +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + for (c = 0; c < vsize; ++c) { + nvc0_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + mask = 0x11111111; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x11111111 << 2; + mask_register_set(®s[c], 0, mask << c); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + +static int +nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) +{ + struct nv_pc_pass *ctx; + int i, ret; + + NOUVEAU_DBG("REGISTER ALLOCATION - entering\n"); + + ctx = CALLOC_STRUCT(nv_pc_pass); + if (!ctx) + return -1; + ctx->pc = pc; + + ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); + if (!ctx->insns) { + FREE(ctx); + return -1; + } + + pc->pass_seq++; + ret = pass_generate_phi_movs(ctx, root); + assert(!ret); + +#ifdef NVC0_RA_DEBUG_LIVEI + nvc0_print_function(root); +#endif + + for (i = 0; i < pc->loop_nesting_bound; ++i) { + pc->pass_seq++; + ret = pass_build_live_sets(ctx, root); + assert(!ret && "live sets"); + if (ret) { + NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); + goto out; + } + } + + pc->pass_seq++; + nvc0_pc_pass_in_order(root, pass_order_instructions, ctx); + + pc->pass_seq++; + ret = pass_build_intervals(ctx, root); + assert(!ret && "build intervals"); + if (ret) { + NOUVEAU_ERR("failed to build live intervals\n"); + goto out; + } + +#ifdef NVC0_RA_DEBUG_LIVEI + for (i = 0; i < pc->num_values; ++i) + livei_print(&pc->values[i]); +#endif + + ret = pass_join_values(ctx, JOIN_MASK_PHI); + if (ret) + goto out; + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND); + if (ret) + goto out; + ret = pass_join_values(ctx, JOIN_MASK_MOV); + if (ret) + goto out; + ret = pass_allocate_constrained_values(ctx); + if (ret) + goto out; + ret = pass_linear_scan(ctx); + if (ret) + goto out; + + for (i = 0; i < pc->num_values; ++i) + livei_release(&pc->values[i]); + + NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n"); + +out: + FREE(ctx->insns); + FREE(ctx); + return ret; +} + +int +nvc0_pc_exec_pass1(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) + return ret; + return 0; +} diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c new file mode 100644 index 0000000000..3c59213176 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -0,0 +1,728 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" + +#define NOUVEAU_DEBUG + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" + +#include "nvc0_context.h" +#include "nvc0_pc.h" + +static unsigned +nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) +{ + unsigned mask = inst->Dst[0].Register.WriteMask; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (c ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_IF: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_texture *tex; + + assert(inst->Instruction.Texture); + tex = &inst->Texture; + + mask = 0x7; + if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && + inst->Instruction.Opcode != TGSI_OPCODE_TXD) + mask |= 0x8; /* bias, lod or proj */ + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_SHADOW1D: + mask &= 0x5; + break; + case TGSI_TEXTURE_2D: + mask &= 0xb; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + { + unsigned x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + } + default: + break; + } + + return mask; +} + +static void +nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id) +{ + int i, c; + + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) + for (c = 0; c < 4; ++c) + ti->input_access[i][c] = id; + + ti->indirect_inputs = TRUE; +} + +static void +nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) +{ + int i, c; + + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) + for (c = 0; c < 4; ++c) + ti->output_access[i][c] = id; + + ti->indirect_outputs = TRUE; +} + +static INLINE unsigned +nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input) +{ + /* NOTE: locations 0xfxx indicate special regs */ + switch (sn) { + /* + case TGSI_SEMANTIC_VERTEXID: + *is_input = TRUE; + return 0x2fc; + */ + case TGSI_SEMANTIC_PRIMID: + *is_input = TRUE; + return 0x60; + /* + case TGSI_SEMANTIC_LAYER_INDEX: + return 0x64; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + return 0x68; + */ + case TGSI_SEMANTIC_INSTANCEID: + *is_input = TRUE; + return 0x2f8; + case TGSI_SEMANTIC_FACE: + *is_input = TRUE; + return 0x3fc; + /* + case TGSI_SEMANTIC_INVOCATIONID: + return 0xf11; + */ + default: + assert(0); + return 0x000; + } +} + +static INLINE unsigned +nvc0_varying_location(unsigned sn, unsigned si) +{ + switch (sn) { + case TGSI_SEMANTIC_POSITION: + return 0x70; + case TGSI_SEMANTIC_COLOR: + return 0x280 + (si * 16); /* are these hard-wired ? */ + case TGSI_SEMANTIC_BCOLOR: + return 0x2a0 + (si * 16); + case TGSI_SEMANTIC_FOG: + return 0x270; + case TGSI_SEMANTIC_PSIZE: + return 0x6c; + /* + case TGSI_SEMANTIC_PNTC: + return 0x2e0; + */ + case TGSI_SEMANTIC_GENERIC: + /* We'd really like to distinguish between TEXCOORD and GENERIC here, + * since only 0x300 to 0x37c can be replaced by sprite coordinates. + * Also, gl_PointCoord should be a system value and must be assigned to + * address 0x2e0. For now, let's cheat: + */ + assert(si < 31); + if (si <= 7) + return 0x300 + si * 16; + if (si == 9) + return 0x2e0; + return 0x80 + ((si - 8) * 16); + case TGSI_SEMANTIC_NORMAL: + return 0x360; + case TGSI_SEMANTIC_PRIMID: + return 0x40; + case TGSI_SEMANTIC_FACE: + return 0x3fc; + case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */ + return 0xf00; + /* + case TGSI_SEMANTIC_CLIP_DISTANCE: + return 0x2c0 + (si * 4); + */ + default: + assert(0); + return 0x000; + } +} + +static INLINE unsigned +nvc0_interp_mode(const struct tgsi_full_declaration *decl) +{ + unsigned mode; + + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) + mode = NVC0_INTERP_FLAT; + else + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + mode = NVC0_INTERP_PERSPECTIVE; + else + mode = NVC0_INTERP_LINEAR; + + if (decl->Declaration.Centroid) + mode |= NVC0_INTERP_CENTROID; + + return mode; +} + +static void +prog_immediate(struct nvc0_translation_info *ti, + const struct tgsi_full_immediate *imm) +{ + int c; + unsigned n = ti->immd32_nr++; + + assert(ti->immd32_nr <= ti->scan.immediate_count); + + for (c = 0; c < 4; ++c) + ti->immd32[n * 4 + c] = imm->u[c].Uint; + + ti->immd32_ty[n] = imm->Immediate.DataType; +} + +static boolean +prog_decl(struct nvc0_translation_info *ti, + const struct tgsi_full_declaration *decl) +{ + unsigned i, c; + unsigned sn = TGSI_SEMANTIC_GENERIC; + unsigned si = 0; + const unsigned first = decl->Range.First; + const unsigned last = decl->Range.Last; + + if (decl->Declaration.Semantic) { + sn = decl->Semantic.Name; + si = decl->Semantic.Index; + } + + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + for (i = first; i <= last; ++i) { + if (ti->prog->type == PIPE_SHADER_VERTEX) { + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = 0x80 + i * 16 + c * 4; + } else { + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for sprite coordinates: */ + ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4; + } + if (ti->prog->type == PIPE_SHADER_FRAGMENT) + ti->interp_mode[i] = nvc0_interp_mode(decl); + } + break; + case TGSI_FILE_OUTPUT: + for (i = first; i <= last; ++i, ++si) { + if (ti->prog->type == PIPE_SHADER_FRAGMENT) { + si = i; + if (i == ti->fp_depth_output) { + ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4; + } else { + if (i > ti->fp_depth_output) + si -= 1; + for (c = 0; c < 4; ++c) + ti->output_loc[i][c] = si * 4 + c; + } + } else { + if (sn == TGSI_SEMANTIC_EDGEFLAG) + ti->edgeflag_out = i; + for (c = 0; c < 4; ++c) + ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for TFB_VARYING_LOCS: */ + ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4; + } + } + break; + case TGSI_FILE_SYSTEM_VALUE: + i = first; + ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); + assert(first == last); + break; + case TGSI_FILE_TEMPORARY: + ti->temp128_nr = MAX2(ti->temp128_nr, last + 1); + break; + case TGSI_FILE_NULL: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_SAMPLER: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_IMMEDIATE: + case TGSI_FILE_PREDICATE: + break; + default: + NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File); + return FALSE; + } + return TRUE; +} + +static void +prog_inst(struct nvc0_translation_info *ti, + const struct tgsi_full_instruction *inst, int id) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + ti->subr[ti->num_subrs].first_insn = id - 1; + ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */ + ++ti->num_subrs; + } + + if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) { + if (dst->Indirect) + nvc0_indirect_outputs(ti, id); + if (!(dst->WriteMask & (1 << c))) + continue; + ti->output_access[dst->Index][c] = id; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && + inst->Src[0].Register.File == TGSI_FILE_INPUT && + dst->Index == ti->edgeflag_out) + ti->prog->vp.edgeflag = inst->Src[0].Register.Index; + } else + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + if (inst->Dst[0].Register.Indirect) + ti->require_stores = TRUE; + } + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File == TGSI_FILE_TEMPORARY) + if (inst->Src[s].Register.Indirect) + ti->require_stores = TRUE; + if (src->File != TGSI_FILE_INPUT) + continue; + mask = nvc0_tgsi_src_mask(inst, s); + + if (inst->Src[s].Register.Indirect) + nvc0_indirect_inputs(ti, id); + + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + if (k <= TGSI_SWIZZLE_W) + ti->input_access[src->Index][k] = id; + } + } +} + +/* Probably should introduce something like struct tgsi_function_declaration + * instead of trying to guess inputs/outputs. + */ +static void +prog_subroutine_inst(struct nvc0_subroutine *subr, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File != TGSI_FILE_TEMPORARY) + continue; + mask = nvc0_tgsi_src_mask(inst, s); + + for (c = 0; c < 4; ++c) { + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + + if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) + if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) + subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); + } + } + + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) + if (dst->WriteMask & (1 << c)) + subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); + } +} + +static int +nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ + int i, c; + unsigned a; + + for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { + for (c = 0; c < 4; ++c, ++a) + if (ti->input_access[i][c]) + vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */ + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { + a = (ti->output_loc[i][0] - 0x40) / 4; + if (ti->output_loc[i][0] >= 0xf00) + continue; + for (c = 0; c < 4; ++c, ++a) { + if (!ti->output_access[i][c]) + continue; + vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */ + } + } + + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { + a = ti->sysval_loc[i] / 4; + if (a > 0 && a < (0xf00 / 4)) + vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32); + } + + return 0; +} + +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ + vp->hdr[0] = 0x20461; + vp->hdr[4] = 0xff000; + + vp->hdr[18] = (1 << vp->vp.num_ucps) - 1; + + return nvc0_vp_gp_gen_header(vp, ti); +} + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) +{ + unsigned invocations = 1; + unsigned max_output_verts, output_prim; + unsigned i; + + gp->hdr[0] = 0x21061; + + for (i = 0; i < ti->scan.num_properties; ++i) { + switch (ti->scan.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + output_prim = ti->scan.properties[i].data[0]; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + max_output_verts = ti->scan.properties[i].data[0]; + assert(max_output_verts < 512); + break; + /* + case TGSI_PROPERTY_GS_INVOCATIONS: + invocations = ti->scan.properties[i].data[0]; + assert(invocations <= 32); + break; + */ + default: + break; + } + } + + gp->hdr[2] = MIN2(invocations, 32) << 24; + + switch (output_prim) { + case PIPE_PRIM_POINTS: + gp->hdr[3] = 0x01000000; + gp->hdr[0] |= 0xf0000000; + break; + case PIPE_PRIM_LINE_STRIP: + gp->hdr[3] = 0x06000000; + gp->hdr[0] |= 0x10000000; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + gp->hdr[3] = 0x07000000; + gp->hdr[0] |= 0x10000000; + break; + default: + assert(0); + break; + } + + gp->hdr[4] = max_output_verts & 0x1ff; + + return nvc0_vp_gp_gen_header(gp, ti); +} + +static int +nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) +{ + int i, c; + unsigned a, m; + + fp->hdr[0] = 0x21462; + fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + + if (ti->scan.uses_kill) + fp->hdr[0] |= 0x8000; + if (ti->scan.writes_z) { + fp->hdr[19] |= 0x2; + if (ti->scan.num_outputs > 2) + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + } else { + if (ti->scan.num_outputs > 1) + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { + m = ti->interp_mode[i] & 3; + for (c = 0; c < 4; ++c) { + if (!ti->input_access[i][c]) + continue; + a = ti->input_loc[i][c] / 2; + if (ti->input_loc[i][c] >= 0x2c0) + a -= 32; + if (ti->input_loc[i][0] == 0x70) + fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */ + else + if (ti->input_loc[i][0] == 0x2e0) + fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */ + else + fp->hdr[4 + a / 32] |= m << (a % 32); + } + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { + if (i != ti->fp_depth_output) + fp->hdr[18] |= 0xf << ti->output_loc[i][0]; + } + + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { + a = ti->sysval_loc[i] / 2; + if ((a > 0) && (a < 0xf00 / 2)) + fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32); + } + + return 0; +} + +static boolean +nvc0_prog_scan(struct nvc0_translation_info *ti) +{ + struct nvc0_program *prog = ti->prog; + struct tgsi_parse_context parse; + int ret; + unsigned i; + +#ifdef NOUVEAU_DEBUG + tgsi_dump(prog->pipe.tokens, 0); +#endif + + tgsi_scan_shader(prog->pipe.tokens, &ti->scan); + + if (ti->prog->type == PIPE_SHADER_FRAGMENT) { + ti->fp_depth_output = 255; + for (i = 0; i < ti->scan.num_outputs; ++i) + if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION) + ti->fp_depth_output = i; + } + + ti->subr = + CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + + ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); + ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); + + ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); + + tgsi_parse_init(&parse, prog->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + prog_immediate(ti, &parse.FullToken.FullImmediate); + break; + case TGSI_TOKEN_TYPE_DECLARATION: + prog_decl(ti, &parse.FullToken.FullDeclaration); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + ti->insns[ti->num_insns] = parse.FullToken.FullInstruction; + prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns); + break; + default: + break; + } + } + + for (i = 0; i < ti->num_subrs; ++i) { + unsigned pc = ti->subr[i].id; + while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) + prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); + } + + switch (prog->type) { + case PIPE_SHADER_VERTEX: + ti->input_file = NV_FILE_MEM_A; + ti->output_file = NV_FILE_MEM_V; + ret = nvc0_vp_gen_header(prog, ti); + break; + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + ret = nvc0_tcp_gen_header(ti); + break; + case PIPE_SHADER_TESSELLATION_EVALUATION: + ret = nvc0_tep_gen_header(ti); + break; + case PIPE_SHADER_GEOMETRY: + ret = nvc0_gp_gen_header(ti); + break; + */ + case PIPE_SHADER_FRAGMENT: + ti->input_file = NV_FILE_MEM_V; + ti->output_file = NV_FILE_GPR; + + if (ti->scan.writes_z) + prog->flags[0] = 0x11; /* ? */ + else + if (!ti->scan.uses_kill && !ti->global_stores) + prog->fp.early_z = 1; + + ret = nvc0_fp_gen_header(prog, ti); + break; + default: + assert(!"unsupported program type"); + ret = -1; + break; + } + + if (ti->require_stores) { + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */ + } + + assert(!ret); + return ret; +} + +boolean +nvc0_program_translate(struct nvc0_program *prog) +{ + struct nvc0_translation_info *ti; + int ret; + + ti = CALLOC_STRUCT(nvc0_translation_info); + ti->prog = prog; + + ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + + prog->vp.edgeflag = PIPE_MAX_ATTRIBS; + + if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) + ti->append_ucp = TRUE; + + ret = nvc0_prog_scan(ti); + if (ret) { + NOUVEAU_ERR("unsupported shader program\n"); + goto out; + } + + ret = nvc0_generate_code(ti); + if (ret) + NOUVEAU_ERR("shader translation failed\n"); + + { + unsigned i; + for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) + debug_printf("HDR[%02lx] = 0x%08x\n", + i * sizeof(prog->hdr[0]), prog->hdr[i]); + } + +out: + if (ti->immd32) + FREE(ti->immd32); + if (ti->immd32_ty) + FREE(ti->immd32_ty); + if (ti->insns) + FREE(ti->insns); + if (ti->subr) + FREE(ti->subr); + FREE(ti); + return ret ? FALSE : TRUE; +} + +void +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + if (prog->res) + nouveau_resource_free(&prog->res); + + if (prog->code) + FREE(prog->code); + if (prog->relocs) + FREE(prog->relocs); + + memset(prog->hdr, 0, sizeof(prog->hdr)); + + prog->translated = FALSE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h new file mode 100644 index 0000000000..f6fea29780 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -0,0 +1,92 @@ + +#ifndef __NVC0_PROGRAM_H__ +#define __NVC0_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +#define NVC0_CAP_MAX_PROGRAM_TEMPS 64 + +#define NVC0_SHADER_HEADER_SIZE (20 * 4) + +struct nvc0_program { + struct pipe_shader_state pipe; + + ubyte type; + boolean translated; + ubyte max_gpr; + + uint32_t *code; + unsigned code_base; + unsigned code_size; + unsigned parm_size; + + uint32_t hdr[20]; /* TODO: move this into code to save space */ + + uint32_t flags[2]; + + struct { + uint8_t edgeflag; + uint8_t num_ucps; + uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS]; + } vp; + struct { + uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; + } fp; + + void *relocs; + unsigned num_relocs; + + struct nouveau_resource *res; +}; + +/* first 2 bits are written into the program header, for each input */ +#define NVC0_INTERP_FLAT (1 << 0) +#define NVC0_INTERP_PERSPECTIVE (2 << 0) +#define NVC0_INTERP_LINEAR (3 << 0) +#define NVC0_INTERP_CENTROID (1 << 2) + +/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ +struct nvc0_subroutine { + unsigned id; + unsigned first_insn; + uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; + uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; +}; + +struct nvc0_translation_info { + struct nvc0_program *prog; + struct tgsi_full_instruction *insns; + unsigned num_insns; + ubyte input_file; + ubyte output_file; + ubyte fp_depth_output; + uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4]; + uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4]; + uint16_t sysval_loc[TGSI_SEMANTIC_COUNT]; + boolean sysval_in[TGSI_SEMANTIC_COUNT]; + int input_access[PIPE_MAX_SHADER_INPUTS][4]; + int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; + ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; + boolean indirect_inputs; + boolean indirect_outputs; + boolean require_stores; + boolean global_stores; + uint32_t *immd32; + ubyte *immd32_ty; + unsigned immd32_nr; + unsigned temp128_nr; + ubyte edgeflag_out; + struct nvc0_subroutine *subr; + unsigned num_subrs; + boolean append_ucp; + struct tgsi_shader_info scan; +}; + +int nvc0_generate_code(struct nvc0_translation_info *); + +void nvc0_relocate_program(struct nvc0_program *, + uint32_t code_base, uint32_t data_base); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c new file mode 100644 index 0000000000..2e9f4c1092 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -0,0 +1,354 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +struct push_context { + struct nouveau_channel *chan; + + void *idxbuf; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + uint32_t prim; + uint32_t restart_index; + uint32_t instance_id; + + struct { + int buffer; + float value; + uint8_t *data; + unsigned offset; + unsigned stride; + } edgeflag; +}; + +static void +init_push_context(struct nvc0_context *nvc0, struct push_context *ctx) +{ + struct pipe_vertex_element *ve; + + ctx->chan = nvc0->screen->base.channel; + ctx->translate = nvc0->vertex->translate; + + ctx->edgeflag.value = 0.5f; + + if (NVC0_USING_EDGEFLAG(nvc0)) { + ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe; + + ctx->edgeflag.buffer = ve->vertex_buffer_index; + ctx->edgeflag.offset = ve->src_offset; + + ctx->packet_vertex_limit = 1; + } else { + ctx->edgeflag.buffer = -1; + ctx->edgeflag.offset = 0; + ctx->edgeflag.stride = 0; + ctx->edgeflag.data = NULL; + + ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; + } + + ctx->vertex_words = nvc0->vertex->vtx_size; +} + +static INLINE void +set_edgeflag(struct push_context *ctx, unsigned vtx_id) +{ + float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride); + + if (ctx->edgeflag.value != f) { + ctx->edgeflag.value = f; + IMMED_RING(ctx->chan, RING_3D(EDGEFLAG_ENABLE), f ? 1 : 0); + } +} + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); + + ctx->chan->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); + OUT_RING (ctx->chan, 0); + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); + + ctx->chan->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); + OUT_RING (ctx->chan, 0); + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id, + ctx->chan->cur); + + ctx->chan->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); + OUT_RING (ctx->chan, 0); + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + if (unlikely(ctx->edgeflag.buffer >= 0)) + set_edgeflag(ctx, start); + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, ctx->instance_id, + ctx->chan->cur); + ctx->chan->cur += size; + count -= push; + start += push; + } +} + + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst = info->instance_count; + boolean apply_bias = info->indexed && info->index_bias; + + init_push_context(nvc0, &ctx); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + uint8_t *data; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; + struct nv04_resource *res = nv04_resource(vb->buffer); + + data = nouveau_resource_map_offset(&nvc0->base, res, + vb->buffer_offset, NOUVEAU_BO_RD); + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) + data += info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + + if (unlikely(i == ctx.edgeflag.buffer)) { + ctx.edgeflag.data = data + ctx.edgeflag.offset; + ctx.edgeflag.stride = vb->stride; + } + } + + if (info->indexed) { + ctx.idxbuf = + nouveau_resource_map_offset(&nvc0->base, + nv04_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); + if (!ctx.idxbuf) + return; + index_size = nvc0->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + } + + ctx.instance_id = info->start_instance; + ctx.prim = nvc0_prim_gl(info->mode); + + while (inst--) { + BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (ctx.chan, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, info->count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, info->count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, info->count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, info->count); + break; + default: + assert(0); + break; + } + IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0); + + ctx.instance_id++; + ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + if (unlikely(ctx.edgeflag.value == 0.0f)) + IMMED_RING(ctx.chan, RING_3D(EDGEFLAG_ENABLE), 1); + + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); +} diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c new file mode 100644 index 0000000000..6f51600558 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_push2.c @@ -0,0 +1,333 @@ + +#if 0 /* not used, kept for now to compare with util/translate */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +struct push_context { + struct nvc0_context *nvc0; + + uint vertex_size; + + void *idxbuf; + uint idxsize; + + float edgeflag; + int edgeflag_input; + + struct { + void *map; + void (*push)(struct nouveau_channel *, void *); + uint32_t stride; + uint32_t divisor; + uint32_t step; + } attr[32]; + int num_attrs; +}; + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); + OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, (v[1] << 16) | v[0]); + OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static void +emit_b64_1(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); +} + +static void +emit_b64_2(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); + OUT_RINGf(chan, v[1]); +} + +static void +emit_b64_3(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); + OUT_RINGf(chan, v[1]); + OUT_RINGf(chan, v[2]); +} + +static void +emit_b64_4(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); + OUT_RINGf(chan, v[1]); + OUT_RINGf(chan, v[2]); + OUT_RINGf(chan, v[3]); +} + +static INLINE void +emit_vertex(struct push_context *ctx, unsigned n) +{ + struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; + int i; + + if (ctx->edgeflag_input < 32) { + /* TODO */ + } + + BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size); + for (i = 0; i < ctx->num_attrs; ++i) + ctx->attr[i].push(chan, + (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride); +} + +static void +emit_edgeflag(struct push_context *ctx, boolean enabled) +{ + struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; + + IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled); +} + +static void +emit_elt08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count--) + emit_vertex(ctx, start++); +} + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, n; + unsigned inst = info->instance_count; + unsigned prim = nvc0_prim_gl(info->mode); + + ctx.nvc0 = nvc0; + ctx.vertex_size = nvc0->vertex->vtx_size; + ctx.idxbuf = NULL; + ctx.num_attrs = 0; + ctx.edgeflag = 0.5f; + ctx.edgeflag_input = 32; + + for (i = 0; i < nvc0->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo; + unsigned nr_components; + + if (!(nvc0->vbo_fifo & (1 << i))) + continue; + n = ctx.num_attrs++; + + if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) + return; + ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; + + nouveau_bo_unmap(bo); + + ctx.attr[n].stride = vb->stride; + ctx.attr[n].divisor = ve->instance_divisor; + + nr_components = util_format_get_nr_components(ve->src_format); + switch (util_format_get_component_bits(ve->src_format, + UTIL_FORMAT_COLORSPACE_RGB, 0)) { + case 8: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b08_1; break; + case 2: ctx.attr[n].push = emit_b16_1; break; + case 3: ctx.attr[n].push = emit_b08_3; break; + case 4: ctx.attr[n].push = emit_b32_1; break; + } + break; + case 16: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b16_1; break; + case 2: ctx.attr[n].push = emit_b32_1; break; + case 3: ctx.attr[n].push = emit_b16_3; break; + case 4: ctx.attr[n].push = emit_b32_2; break; + } + break; + case 32: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b32_1; break; + case 2: ctx.attr[n].push = emit_b32_2; break; + case 3: ctx.attr[n].push = emit_b32_3; break; + case 4: ctx.attr[n].push = emit_b32_4; break; + } + break; + default: + assert(0); + break; + } + } + + if (info->indexed) { + struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); + if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) + return; + ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset; + nouveau_bo_unmap(res->bo); + ctx.idxsize = nvc0->idxbuf.index_size; + } else { + ctx.idxsize = 0; + } + + while (inst--) { + BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (nvc0->screen->base.channel, prim); + switch (ctx.idxsize) { + case 0: + emit_seq(&ctx, info->start, info->count); + break; + case 1: + emit_elt08(&ctx, info->start, info->count); + break; + case 2: + emit_elt16(&ctx, info->start, info->count); + break; + case 4: + emit_elt32(&ctx, info->start, info->count); + break; + } + IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c new file mode 100644 index 0000000000..ead015b6b8 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -0,0 +1,341 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#include "nvc0_context.h" +#include "nouveau/nv_object.xml.h" + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +struct nvc0_query { + uint32_t *data; + uint32_t type; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * 16 */ + boolean ready; + boolean is64bit; + struct nouveau_mm_allocation *mm; +}; + +#define NVC0_QUERY_ALLOC_SPACE 128 + +static INLINE struct nvc0_query * +nvc0_query(struct pipe_query *pipe) +{ + return (struct nvc0_query *)pipe; +} + +static boolean +nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->mm) { + if (q->ready) + nouveau_mm_free(q->mm); + else + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm); + } + } + if (size) { + q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD | + NOUVEAU_BO_NOSYNC); + if (ret) { + nvc0_query_allocate(nvc0, q, 0); + return FALSE; + } + q->data = q->bo->map; + nouveau_bo_unmap(q->bo); + } + return TRUE; +} + +static void +nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0); + FREE(nvc0_query(pq)); +} + +static struct pipe_query * +nvc0_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q; + + q = CALLOC_STRUCT(nvc0_query); + if (!q) + return NULL; + + if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) { + FREE(q); + return NULL; + } + + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS); + q->type = type; + + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset -= 16; + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + } + + return (struct pipe_query *)q; +} + +static void +nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, q->sequence); + OUT_RING (chan, get); +} + +static void +nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q = nvc0_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset += 16; + q->data += 16 / sizeof(*q->data); + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[1] = 1; /* initial render condition = TRUE */ + } + if (!q->is64bit) + q->data[0] = q->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT); + IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ + IMMED_RING(chan, RING_3D(COUNTER_RESET), + NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + IMMED_RING(chan, RING_3D(COUNTER_RESET), + NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + break; + case PIPE_QUERY_SO_STATISTICS: + BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(chan, q, 0x10, 0x00005002); + break; + default: + break; + } + q->ready = FALSE; +} + +static void +nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q = nvc0_query(pq); + + const int index = 0; /* for multiple vertex streams */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nvc0_query_get(chan, q, 0, 0x0100f002); + BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + OUT_RING (chan, 0); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5)); + nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(chan, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + nvc0_query_get(chan, q, 0, 0x1000f010); + break; + default: + assert(0); + break; + } +} + +static INLINE boolean +nvc0_query_ready(struct nvc0_query *q) +{ + return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); +} + +static INLINE boolean +nvc0_query_wait(struct nvc0_query *q) +{ + int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD); + if (ret) + return FALSE; + nouveau_bo_unmap(q->bo); + return TRUE; +} + +static boolean +nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, void *result) +{ + struct nvc0_query *q = nvc0_query(pq); + uint64_t *res64 = result; + uint32_t *res32 = result; + boolean *res8 = result; + uint64_t *data64 = (uint64_t *)q->data; + + if (q->type == PIPE_QUERY_GPU_FINISHED) { + res8[0] = nvc0_query_ready(q); + return TRUE; + } + + if (!q->ready) /* update ? */ + q->ready = nvc0_query_ready(q); + if (!q->ready) { + struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel; + if (!wait) { + if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */ + FIRE_RING(chan); + return FALSE; + } + if (!nvc0_query_wait(q)) + return FALSE; + } + q->ready = TRUE; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res32[0] = q->data[1]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0]; + res64[1] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ + res64[0] = 1000000000; + res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + default: + return FALSE; + } + + return TRUE; +} + +static void +nvc0_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, uint mode) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q; + + if (!pq) { + IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + return; + } + q = nvc0_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, q->sequence); + OUT_RING (chan, 0x00001001); + } + + MARK_RING (chan, 4, 2); + BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO); +} + +void +nvc0_init_query_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->create_query = nvc0_query_create; + pipe->destroy_query = nvc0_query_destroy; + pipe->begin_query = nvc0_query_begin; + pipe->end_query = nvc0_query_end; + pipe->get_query_result = nvc0_query_result; + pipe->render_condition = nvc0_render_condition; +} diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c new file mode 100644 index 0000000000..44e66314e7 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.c @@ -0,0 +1,51 @@ + +#include "pipe/p_context.h" +#include "nvc0_resource.h" +#include "nouveau/nouveau_screen.h" + + +static struct pipe_resource * +nvc0_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + switch (templ->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(screen, templ); + default: + return nvc0_miptree_create(screen, templ); + } +} + +static struct pipe_resource * +nvc0_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) + return NULL; + else + return nvc0_miptree_from_handle(screen, templ, whandle); +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext) +{ + pcontext->get_transfer = u_get_transfer_vtbl; + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_destroy = u_transfer_destroy_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->create_surface = nvc0_miptree_surface_new; + pcontext->surface_destroy = nvc0_miptree_surface_del; +} + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nvc0_resource_create; + pscreen->resource_from_handle = nvc0_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; + pscreen->user_buffer_create = nouveau_user_buffer_create; +} diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h new file mode 100644 index 0000000000..f1c445b515 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -0,0 +1,75 @@ + +#ifndef __NVC0_RESOURCE_H__ +#define __NVC0_RESOURCE_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_fence.h" +#include "nouveau/nouveau_buffer.h" +#undef NOUVEAU_NVC0 + +void +nvc0_init_resource_functions(struct pipe_context *pcontext); + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); + +#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) + +#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0)) +#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1)) +#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2)) + +#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \ + NVC0_TILE_DIM_SHIFT(m, 0)) << \ + NVC0_TILE_DIM_SHIFT(m, 1)) + +#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2)) + +struct nvc0_miptree_level { + uint32_t offset; + uint32_t pitch; + uint32_t tile_mode; +}; + +#define NVC0_MAX_TEXTURE_LEVELS 16 + +struct nvc0_miptree { + struct nv04_resource base; + struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; + uint32_t total_size; + uint32_t layer_stride; + boolean layout_3d; /* TRUE if layer count varies with mip level */ +}; + +static INLINE struct nvc0_miptree * +nvc0_miptree(struct pipe_resource *pt) +{ + return (struct nvc0_miptree *)pt; +} + +/* Internal functions: + */ +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle); + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); + +void +nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); + +uint32_t +nvc0_miptree_zslice_offset(struct nvc0_miptree *, unsigned l, unsigned z); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c new file mode 100644 index 0000000000..1047ba3c33 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -0,0 +1,676 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "nvc0_context.h" +#include "nvc0_screen.h" + +#include "nouveau/nv_object.xml.h" +#include "nvc0_graph_macros.h" + +static boolean +nvc0_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) +{ + if (sample_count > 1) + return FALSE; + + if (!util_format_s3tc_enabled) { + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return FALSE; + default: + break; + } + } + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nvc0_format_table[format].usage & bindings) == bindings; +} + +static int +nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 32; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 64; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_GLSL: + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_STREAM_OUTPUT: + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + return 1; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static int +nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + case PIPE_SHADER_TESSELLATION_EVALUATION: + */ + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 4; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + return 0x300 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 14; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return NVC0_CAP_MAX_PROGRAM_TEMPS; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; /* please inline, or provide function declarations */ + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } +} + +static float +nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } +} + +static void +nvc0_screen_destroy(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref(NULL, &screen->base.fence.current); + + nouveau_bo_ref(NULL, &screen->text); + nouveau_bo_ref(NULL, &screen->tls); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->fence.bo); + nouveau_bo_ref(NULL, &screen->vfetch_cache); + + nouveau_resource_destroy(&screen->text_heap); + + if (screen->tic.entries) + FREE(screen->tic.entries); + + nouveau_mm_destroy(screen->mm_VRAM_fe0); + + nouveau_grobj_free(&screen->fermi); + nouveau_grobj_free(&screen->eng2d); + nouveau_grobj_free(&screen->m2mf); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static int +nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, + unsigned size, const uint32_t *data) +{ + struct nouveau_channel *chan = screen->base.channel; + + size /= 4; + + BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2); + OUT_RING (chan, (m - 0x3800) / 8); + OUT_RING (chan, pos); + BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); + OUT_RING (chan, pos); + OUT_RINGp (chan, data, size); + + return pos + size; +} + +static void +nvc0_magic_3d_init(struct nouveau_channel *chan) +{ + BEGIN_RING(chan, RING_3D_(0x10cc), 1); + OUT_RING (chan, 0xff); + BEGIN_RING(chan, RING_3D_(0x10e0), 2); + OUT_RING(chan, 0xff); + OUT_RING(chan, 0xff); + BEGIN_RING(chan, RING_3D_(0x10ec), 2); + OUT_RING(chan, 0xff); + OUT_RING(chan, 0xff); + BEGIN_RING(chan, RING_3D_(0x074c), 1); + OUT_RING (chan, 0x3f); + + BEGIN_RING(chan, RING_3D_(0x16a8), 1); + OUT_RING (chan, (3 << 16) | 3); + BEGIN_RING(chan, RING_3D_(0x1794), 1); + OUT_RING (chan, (2 << 16) | 2); + BEGIN_RING(chan, RING_3D_(0x0de8), 1); + OUT_RING (chan, 1); + +#if 0 /* software method */ + BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */ + OUT_RING (chan, 0); +#endif + + BEGIN_RING(chan, RING_3D_(0x12ac), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0218), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x10fc), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x1290), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x12d8), 2); + OUT_RING (chan, 0x10); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x06d4), 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, RING_3D_(0x1140), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x1610), 1); + OUT_RING (chan, 0xe); + + BEGIN_RING(chan, RING_3D_(0x164c), 1); + OUT_RING (chan, 1 << 12); + BEGIN_RING(chan, RING_3D_(0x151c), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x020c), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x030c), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0300), 1); + OUT_RING (chan, 3); +#if 0 /* software method */ + BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */ + OUT_RING (chan, 0); +#endif + BEGIN_RING(chan, RING_3D_(0x02d0), 1); + OUT_RING (chan, 0x1f40); + BEGIN_RING(chan, RING_3D_(0x00fdc), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x19c0), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x075c), 1); + OUT_RING (chan, 3); + + BEGIN_RING(chan, RING_3D_(0x0fac), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0f90), 1); + OUT_RING (chan, 0); +} + +static void +nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 sequence) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nouveau_channel *chan = screen->base.channel; + + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); + OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); + OUT_RING (chan, sequence); + OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | + (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); +} + +static u32 +nvc0_screen_fence_update(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + return screen->fence.map[0]; +} + +#define FAIL_SCREEN_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nvc0_screen_destroy(pscreen); \ + return NULL; \ + } while(0) + +struct pipe_screen * +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +{ + struct nvc0_screen *screen; + struct nouveau_channel *chan; + struct pipe_screen *pscreen; + int ret; + unsigned i; + + screen = CALLOC_STRUCT(nvc0_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + screen->base.sysmem_bindings = PIPE_BIND_CONSTANT_BUFFER; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + nvc0_screen_destroy(pscreen); + return NULL; + } + chan = screen->base.channel; + + pscreen->winsys = ws; + pscreen->destroy = nvc0_screen_destroy; + pscreen->context_create = nvc0_create; + pscreen->is_format_supported = nvc0_screen_is_format_supported; + pscreen->get_param = nvc0_screen_get_param; + pscreen->get_shader_param = nvc0_screen_get_shader_param; + pscreen->get_paramf = nvc0_screen_get_paramf; + + nvc0_screen_init_resource_functions(pscreen); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, + &screen->fence.bo); + if (ret) + goto fail; + nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR); + screen->fence.map = screen->fence.bo->map; + nouveau_bo_unmap(screen->fence.bo); + screen->base.fence.emit = nvc0_screen_fence_emit; + screen->base.fence.update = nvc0_screen_fence_update; + + for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, + &screen->scratch.bo[i]); + if (ret) + goto fail; + } + + ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); + + BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF); + BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RING (chan, 0); + + ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); + + BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D); + BEGIN_RING(chan, RING_2D(OPERATION), 1); + OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY); + BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D_(0x0884), 1); + OUT_RING (chan, 0x3f); + BEGIN_RING(chan, RING_2D_(0x0888), 1); + OUT_RING (chan, 1); + + ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + + BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D); + BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(COND_MODE), 1); + OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS); + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); + OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); + OUT_RING (chan, 0); + + nvc0_magic_3d_init(chan); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text); + if (ret) + goto fail; + + nouveau_resource_init(&screen->text_heap, 0, 1 << 20); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, + &screen->uniforms); + if (ret) + goto fail; + + /* auxiliary constants (6 user clip planes, base instance id) */ + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, 256); + OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + for (i = 0; i < 5; ++i) { + BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1); + OUT_RING (chan, (15 << 4) | 1); + } + + screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, + screen->tls_size, &screen->tls); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2); + OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, screen->tls_size >> 32); + OUT_RING (chan, screen->tls_size); + BEGIN_RING(chan, RING_3D_(0x07a0), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); + OUT_RING (chan, 0); + + for (i = 0; i < 5; ++i) { + BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1); + OUT_RING (chan, 0x54); + } + BEGIN_RING(chan, RING_3D(LINKED_TSC), 1); + OUT_RING (chan, 0); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, + &screen->vfetch_cache); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->vfetch_cache, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->vfetch_cache, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 3); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1); + + BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1); + + BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ + OUT_RING (chan, 0x3f); + + BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1); + OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); + OUT_RING (chan, 0); + + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 1.0f); + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); + + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3); + OUT_RING (chan, 1); + OUT_RING (chan, 8192 << 16); + OUT_RING (chan, 8192 << 16); + + BEGIN_RING(chan, RING_3D_(0x0fac), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x3484), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0dbc), 1); + OUT_RING (chan, 0x00010000); + BEGIN_RING(chan, RING_3D_(0x0dd8), 1); + OUT_RING (chan, 0xff800006); + BEGIN_RING(chan, RING_3D_(0x3488), 1); + OUT_RING (chan, 0); + +#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); + + i = 0; + MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables); + MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); + MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select); + MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select); + MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); + MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); + MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc); + + BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); + OUT_RING (chan, 0x40); + BEGIN_RING(chan, RING_3D(LAYER), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); + OUT_RING (chan, 0x30); + BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1); + OUT_RING (chan, 3); + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); + OUT_RING (chan, 0x20); + BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1); + OUT_RING (chan, 0x00); + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1); + OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL); + + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0x11111111); + BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); + OUT_RING (chan, 0xab); + OUT_RING (chan, 0x00000000); + + FIRE_RING (chan); + + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; + + screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); + + return pscreen; + +fail: + nvc0_screen_destroy(pscreen); + return NULL; +} + +void +nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) +{ + struct nouveau_channel *chan = screen->base.channel; + + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + MARK_RING(chan, 5, 5); + nouveau_bo_validate(chan, screen->text, flags); + nouveau_bo_validate(chan, screen->uniforms, flags); + nouveau_bo_validate(chan, screen->txc, flags); + nouveau_bo_validate(chan, screen->vfetch_cache, flags); + + if (screen->cur_ctx && screen->cur_ctx->state.tls_required) + nouveau_bo_validate(chan, screen->tls, flags); +} + +int +nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nv50_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; +} + +int +nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nv50_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h new file mode 100644 index 0000000000..94bf0cf348 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -0,0 +1,148 @@ +#ifndef __NVC0_SCREEN_H__ +#define __NVC0_SCREEN_H__ + +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_mm.h" +#undef NOUVEAU_NVC0 +#include "nvc0_winsys.h" +#include "nvc0_stateobj.h" + +#define NVC0_TIC_MAX_ENTRIES 2048 +#define NVC0_TSC_MAX_ENTRIES 2048 + +struct nvc0_context; + +#define NVC0_SCRATCH_SIZE (2 << 20) +#define NVC0_SCRATCH_NR_BUFFERS 2 + +struct nvc0_screen { + struct nouveau_screen base; + struct nouveau_winsys *nvws; + + struct nvc0_context *cur_ctx; + + struct nouveau_bo *text; + struct nouveau_bo *uniforms; + struct nouveau_bo *tls; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *vfetch_cache; + + uint64_t tls_size; + + struct nouveau_resource *text_heap; + + struct { + struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; + uint8_t *buf; + int index; + uint32_t offset; + } scratch; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + struct nouveau_bo *bo; + uint32_t *map; + } fence; + + struct nouveau_mman *mm_VRAM_fe0; + + struct nouveau_grobj *fermi; + struct nouveau_grobj *eng2d; + struct nouveau_grobj *m2mf; +}; + +static INLINE struct nvc0_screen * +nvc0_screen(struct pipe_screen *screen) +{ + return (struct nvc0_screen *)screen; +} + +void nvc0_screen_make_buffers_resident(struct nvc0_screen *); + +int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); +int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); + +static INLINE void +nvc0_resource_fence(struct nv04_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + if (res->mm) { + nouveau_fence_ref(screen->base.fence.current, &res->fence); + + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(screen->base.fence.current, &res->fence_wr); + } +} + +static INLINE void +nvc0_resource_validate(struct nv04_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + if (likely(res->bo)) { + nouveau_bo_validate(screen->base.channel, res->bo, flags); + + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + if (flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0_resource_fence(res, flags); + } +} + +struct nvc0_format { + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; +}; + +extern const struct nvc0_format nvc0_format_table[]; + +static INLINE void +nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c new file mode 100644 index 0000000000..7294eaa222 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -0,0 +1,249 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nvc0_context.h" + +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + if (prog->hdr[1]) + nvc0->state.tls_required |= 1 << stage; + else + nvc0->state.tls_required &= ~(1 << stage); +} + +static boolean +nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + int ret; + unsigned size; + + if (prog->translated) + return TRUE; + + prog->translated = nvc0_program_translate(prog); + if (!prog->translated) + return FALSE; + + size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100); + + ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog, + &prog->res); + if (ret) + return FALSE; + + prog->code_base = prog->res->start; + + nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, prog->code_base, + NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, + prog->code_base + NVC0_SHADER_HEADER_SIZE, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + + BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1); + OUT_RING (nvc0->screen->base.channel, 0x1111); + + return TRUE; +} + +void +nvc0_vertprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp = nvc0->vertprog; + + if (nvc0->clip.nr > vp->vp.num_ucps) { + assert(nvc0->clip.nr <= 6); + vp->vp.num_ucps = 6; + + if (vp->translated) + nvc0_program_destroy(nvc0, vp); + } + + if (!nvc0_program_validate(nvc0, vp)) + return; + nvc0_program_update_context_state(nvc0, vp, 0); + + BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); + OUT_RING (chan, 0x11); + OUT_RING (chan, vp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1); + OUT_RING (chan, vp->max_gpr); + + // BEGIN_RING(chan, RING_3D_(0x163c), 1); + // OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 1); +} + +void +nvc0_fragprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *fp = nvc0->fragprog; + + if (!nvc0_program_validate(nvc0, fp)) + return; + nvc0_program_update_context_state(nvc0, fp, 4); + + BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2); + OUT_RING (chan, 0x51); + OUT_RING (chan, fp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1); + OUT_RING (chan, fp->max_gpr); + + BEGIN_RING(chan, RING_3D_(0x0360), 2); + OUT_RING (chan, 0x20164010); + OUT_RING (chan, 0x20); + BEGIN_RING(chan, RING_3D_(0x196c), 1); + OUT_RING (chan, fp->flags[0]); +} + +void +nvc0_tctlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *tp = nvc0->tctlprog; + + if (!tp) { + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); + OUT_RING (chan, 0x20); + return; + } + if (!nvc0_program_validate(nvc0, tp)) + return; + nvc0_program_update_context_state(nvc0, tp, 1); + + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); + OUT_RING (chan, 0x21); + OUT_RING (chan, tp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1); + OUT_RING (chan, tp->max_gpr); +} + +void +nvc0_tevlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *tp = nvc0->tevlprog; + + if (!tp) { + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); + OUT_RING (chan, 0x30); + return; + } + if (!nvc0_program_validate(nvc0, tp)) + return; + nvc0_program_update_context_state(nvc0, tp, 2); + + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); + OUT_RING (chan, 0x31); + BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1); + OUT_RING (chan, tp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1); + OUT_RING (chan, tp->max_gpr); +} + +void +nvc0_gmtyprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *gp = nvc0->gmtyprog; + + if (!gp) { + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); + OUT_RING (chan, 0x40); + return; + } + if (!nvc0_program_validate(nvc0, gp)) + return; + nvc0_program_update_context_state(nvc0, gp, 3); + + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); + OUT_RING (chan, 0x41); + BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1); + OUT_RING (chan, gp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); + OUT_RING (chan, gp->max_gpr); +} + +/* It's *is* kind of shader related. We need to inspect the program + * to get the output locations right. + */ +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp; + struct nvc0_transform_feedback_state *tfb = nvc0->tfb; + int b; + + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + if (!tfb) { + OUT_RING(chan, 0); + return; + } + OUT_RING(chan, 1); + + vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog; + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + uint8_t idx, var[128]; + int i, n; + struct nv04_resource *buf = nv04_resource(nvc0->tfbbuf[b]); + + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); + OUT_RING (chan, 1); + OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]); + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */ + + if (!(nvc0->dirty & NVC0_NEW_TFB)) + continue; + + BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, tfb->varying_count[b]); + OUT_RING (chan, tfb->stride[b]); + + n = b ? tfb->varying_count[b - 1] : 0; + i = 0; + for (; i < tfb->varying_count[b]; ++i) { + idx = tfb->varying_index[n + i]; + var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3); + } + for (; i & 3; ++i) + var[i] = 0; + + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); + OUT_RINGp (chan, var, i / 4); + } + for (; b < 4; ++b) + IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c new file mode 100644 index 0000000000..ab68abcfb5 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -0,0 +1,860 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvc0_stateobj.h" +#include "nvc0_context.h" + +#include "nvc0_3d.xml.h" +#include "nv50/nv50_texture.xml.h" + +#include "nouveau/nouveau_gldefs.h" + +static INLINE uint32_t +nvc0_colormask(unsigned mask) +{ + uint32_t ret = 0; + + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; + + return ret; +} + +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + +static INLINE uint32_t +nvc0_blend_fac(unsigned factor) +{ + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } +} + +static void * +nvc0_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); + int i; + + so->pipe = *cso; + + SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable); + + if (!cso->independent_blend_enable) { + SB_BEGIN_3D(so, BLEND_ENABLES, 1); + SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0); + + if (cso->rt[0].blend_enable) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor)); + } + + SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1); + SB_DATA (so, nvc0_colormask(cso->rt[0].colormask)); + } else { + uint8_t en = 0; + + for (i = 0; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + en |= 1 << i; + + SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); + } + SB_BEGIN_3D(so, BLEND_ENABLES, 1); + SB_DATA (so, en); + + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + for (i = 0; i < 8; ++i) + SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + } else { + SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return so; +} + +static void +nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend = hwcso; + nvc0->dirty |= NVC0_NEW_BLEND; +} + +static void +nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nvc0_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvc0_rasterizer_stateobj *so; + uint32_t reg; + + so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : + NVC0_3D_SHADE_MODEL_SMOOTH); + SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); + SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); + + SB_BEGIN_3D(so, LINE_WIDTH, 1); + SB_DATA (so, fui(cso->line_width)); + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); + + SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); + if (cso->line_stipple_enable) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + + } else { + SB_DATA (so, 0); + } + + SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + + reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ? + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT : + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1); + SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg); + SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); + SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth); + + SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : + NVC0_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NVC0_3D_CULL_FACE_BACK); + break; + } + + SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units * 2.0f)); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->rast = hwcso; + nvc0->dirty |= NVC0_NEW_RASTERIZER; +} + +static void +nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nvc0_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj); + + so->pipe = *cso; + + SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled); + if (cso->depth.enabled) { + SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); + SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); + } else { + SB_IMMED_3D(so, STENCIL_ENABLE, 0); + } + + if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else + if (cso->stencil[0].enabled) { + SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); + } + + SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled); + if (cso->alpha.enabled) { + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->zsa = hwcso; + nvc0->dirty |= NVC0_NEW_ZSA; +} + +static void +nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +static void +nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + unsigned s, i; + + for (s = 0; s < 5; ++s) + for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) + if (nvc0_context(pipe)->samplers[s][i] == hwcso) + nvc0_context(pipe)->samplers[s][i] = NULL; + + nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nv50_tsc_entry(hwcso)); + + FREE(hwcso); +} + +static INLINE void +nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *old = nvc0->samplers[s][i]; + + nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + if (old) + nvc0_screen_tsc_unlock(nvc0->screen, old); + } + for (; i < nvc0->num_samplers[s]; ++i) + if (nvc0->samplers[s][i]) + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + + nvc0->num_samplers[s] = nr; + + nvc0->dirty |= NVC0_NEW_SAMPLERS; +} + +static void +nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s); +} + +static void +nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s); +} + +static void +nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s); +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nvc0_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + + nvc0_screen_tic_free(nvc0_context(pipe)->screen, nv50_tic_entry(view)); + + FREE(nv50_tic_entry(view)); +} + +static INLINE void +nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + if (old) + nvc0_screen_tic_unlock(nvc0->screen, old); + + pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); + } + + for (i = nr; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + if (!old) + continue; + nvc0_screen_tic_unlock(nvc0->screen, old); + + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + } + + nvc0->num_textures[s] = nr; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); + + nvc0->dirty |= NVC0_NEW_TEXTURES; +} + +static void +nvc0_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views); +} + +static void +nvc0_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views); +} + +static void +nvc0_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views); +} + +/* ============================= SHADERS ======================================= + */ + +static void * +nvc0_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) +{ + struct nvc0_program *prog; + + prog = CALLOC_STRUCT(nvc0_program); + if (!prog) + return NULL; + + prog->type = type; + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + return (void *)prog; +} + +static void +nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_program *prog = (struct nvc0_program *)hwcso; + + nvc0_program_destroy(nvc0_context(pipe), prog); + + FREE((void *)prog->pipe.tokens); + FREE(prog); +} + +static void * +nvc0_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertprog = hwcso; + nvc0->dirty |= NVC0_NEW_VERTPROG; +} + +static void * +nvc0_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->fragprog = hwcso; + nvc0->dirty |= NVC0_NEW_FRAGPROG; +} + +static void * +nvc0_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->gmtyprog = hwcso; + nvc0->dirty |= NVC0_NEW_GMTYPROG; +} + +static void +nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_resource *res) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + switch (shader) { + case PIPE_SHADER_VERTEX: shader = 0; break; + /* + case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break; + case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break; + */ + case PIPE_SHADER_GEOMETRY: shader = 3; break; + case PIPE_SHADER_FRAGMENT: shader = 4; break; + default: + assert(0); + break; + } + + if (nvc0->constbuf[shader][index]) + nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT, + nv04_resource(nvc0->constbuf[shader][index])); + + pipe_resource_reference(&nvc0->constbuf[shader][index], res); + + nvc0->constbuf_dirty[shader] |= 1 << index; + + nvc0->dirty |= NVC0_NEW_CONSTBUF; +} + +/* ============================================================================= + */ + +static void +nvc0_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend_colour = *bcol; + nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; +} + +static void +nvc0_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stencil_ref = *sr; + nvc0->dirty |= NVC0_NEW_STENCIL_REF; +} + +static void +nvc0_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + const unsigned size = clip->nr * sizeof(clip->ucp[0]); + + memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size); + nvc0->clip.nr = clip->nr; + + nvc0->clip.depth_clamp = clip->depth_clamp; + + nvc0->dirty |= NVC0_NEW_CLIP; +} + +static void +nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_mask = sample_mask; + nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; +} + + +static void +nvc0_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->framebuffer = *fb; + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stipple = *stipple; + nvc0->dirty |= NVC0_NEW_STIPPLE; +} + +static void +nvc0_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->scissor = *scissor; + nvc0->dirty |= NVC0_NEW_SCISSOR; +} + +static void +nvc0_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->viewport = *vpt; + nvc0->dirty |= NVC0_NEW_VIEWPORT; +} + +static void +nvc0_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + + for (i = 0; i < count; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); + for (; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); + nvc0->num_vtxbufs = count; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + + nvc0->dirty |= NVC0_NEW_ARRAYS; +} + +static void +nvc0_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (ib) { + pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer); + + memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf)); + } else { + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + } +} + +static void +nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertex = hwcso; + nvc0->dirty |= NVC0_NEW_VERTEX; +} + +static void * +nvc0_tfb_state_create(struct pipe_context *pipe, + const struct pipe_stream_output_state *pso) +{ + struct nvc0_transform_feedback_state *so; + int n = 0; + int i, c, b; + + so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t)); + if (!so) + return NULL; + + for (b = 0; b < 4; ++b) { + for (i = 0; i < pso->num_outputs; ++i) { + if (pso->output_buffer[i] != b) + continue; + for (c = 0; c < 4; ++c) { + if (!(pso->register_mask[i] & (1 << c))) + continue; + so->varying_count[b]++; + so->varying_index[n++] = (pso->register_index[i] << 2) | c; + } + } + so->stride[b] = so->varying_count[b] * 4; + } + if (pso->stride) + so->stride[0] = pso->stride; + + return so; +} + +static void +nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso) +{ + nvc0_context(pipe)->tfb = hwcso; + nvc0_context(pipe)->dirty |= NVC0_NEW_TFB; +} + +static void +nvc0_set_transform_feedback_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int i; + + assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */ + + for (i = 0; i < num_buffers; ++i) { + assert(offsets[i] >= 0); + nvc0->tfb_offset[i] = offsets[i]; + pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + + nvc0->num_tfbbufs = num_buffers; + + nvc0->dirty |= NVC0_NEW_TFB_BUFFERS; +} + +void +nvc0_init_state_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->create_blend_state = nvc0_blend_state_create; + pipe->bind_blend_state = nvc0_blend_state_bind; + pipe->delete_blend_state = nvc0_blend_state_delete; + + pipe->create_rasterizer_state = nvc0_rasterizer_state_create; + pipe->bind_rasterizer_state = nvc0_rasterizer_state_bind; + pipe->delete_rasterizer_state = nvc0_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nvc0_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nvc0_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nvc0_zsa_state_delete; + + pipe->create_sampler_state = nv50_sampler_state_create; + pipe->delete_sampler_state = nvc0_sampler_state_delete; + pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind; + pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind; + pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind; + + pipe->create_sampler_view = nvc0_create_sampler_view; + pipe->sampler_view_destroy = nvc0_sampler_view_destroy; + pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views; + pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views; + pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views; + + pipe->create_vs_state = nvc0_vp_state_create; + pipe->create_fs_state = nvc0_fp_state_create; + pipe->create_gs_state = nvc0_gp_state_create; + pipe->bind_vs_state = nvc0_vp_state_bind; + pipe->bind_fs_state = nvc0_fp_state_bind; + pipe->bind_gs_state = nvc0_gp_state_bind; + pipe->delete_vs_state = nvc0_sp_state_delete; + pipe->delete_fs_state = nvc0_sp_state_delete; + pipe->delete_gs_state = nvc0_sp_state_delete; + + pipe->set_blend_color = nvc0_set_blend_color; + pipe->set_stencil_ref = nvc0_set_stencil_ref; + pipe->set_clip_state = nvc0_set_clip_state; + pipe->set_sample_mask = nvc0_set_sample_mask; + pipe->set_constant_buffer = nvc0_set_constant_buffer; + pipe->set_framebuffer_state = nvc0_set_framebuffer_state; + pipe->set_polygon_stipple = nvc0_set_polygon_stipple; + pipe->set_scissor_state = nvc0_set_scissor_state; + pipe->set_viewport_state = nvc0_set_viewport_state; + + pipe->create_vertex_elements_state = nvc0_vertex_state_create; + pipe->delete_vertex_elements_state = nvc0_vertex_state_delete; + pipe->bind_vertex_elements_state = nvc0_vertex_state_bind; + + pipe->set_vertex_buffers = nvc0_set_vertex_buffers; + pipe->set_index_buffer = nvc0_set_index_buffer; + + pipe->create_stream_output_state = nvc0_tfb_state_create; + pipe->delete_stream_output_state = nvc0_tfb_state_delete; + pipe->bind_stream_output_state = nvc0_tfb_state_bind; + pipe->set_stream_output_buffers = nvc0_set_transform_feedback_buffers; + + pipe->redefine_user_buffer = u_default_redefine_user_buffer; +} + diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c new file mode 100644 index 0000000000..bb81480bab --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -0,0 +1,483 @@ + +#include "nvc0_context.h" +#include "os/os_time.h" + +static void +nvc0_validate_zcull(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); + struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture); + struct nouveau_bo *bo = mt->base.bo; + uint32_t size; + uint32_t offset = align(mt->total_size, 1 << 17); + unsigned width, height; + + assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); + + size = mt->total_size * 2; + + height = align(fb->height, 32); + width = fb->width % 224; + if (width) + width = fb->width + (224 - width); + else + width = fb->width; + + MARK_RING (chan, 23, 4); + BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + offset += 1 << 17; + BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */ + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + BEGIN_RING(chan, RING_3D_(0x07e0), 2); + OUT_RING (chan, size); + OUT_RING (chan, size >> 16); + BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */ + OUT_RING (chan, 2); + BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */ + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x15fc), 2); + OUT_RING (chan, 0); /* bits 0xffff */ + OUT_RING (chan, 0); /* bits 0xffff */ + BEGIN_RING(chan, RING_3D_(0x1958), 1); + OUT_RING (chan, 0); /* bits ~0 */ +} + +static void +nvc0_validate_fb(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + boolean serialize = FALSE; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs); + BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2); + OUT_RING (chan, fb->width << 16); + OUT_RING (chan, fb->height << 16); + + MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); + struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); + struct nouveau_bo *bo = mt->base.bo; + uint32_t offset = sf->offset; + + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 9); + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, nvc0_format_table[sf->base.format].rt); + OUT_RING (chan, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, sf->base.u.tex.first_layer + sf->depth); + OUT_RING (chan, mt->layer_stride >> 2); + OUT_RING (chan, sf->base.u.tex.first_layer); + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + + /* only register for writing, otherwise we'd always serialize here */ + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } + + if (fb->zsbuf) { + struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); + struct nouveau_bo *bo = mt->base.bo; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + uint32_t offset = sf->offset; + + MARK_RING (chan, 12, 2); + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, mt->layer_stride >> 2); + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (unk << 16) | + (sf->base.u.tex.first_layer + sf->depth)); + BEGIN_RING(chan, RING_3D(ZETA_BASE_LAYER), 1); + OUT_RING (chan, sf->base.u.tex.first_layer); + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } else { + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 0); + } + + if (serialize) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + } +} + +static void +nvc0_validate_blend_colour(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4); + OUT_RINGf (chan, nvc0->blend_colour.color[0]); + OUT_RINGf (chan, nvc0->blend_colour.color[1]); + OUT_RINGf (chan, nvc0->blend_colour.color[2]); + OUT_RINGf (chan, nvc0->blend_colour.color[3]); +} + +static void +nvc0_validate_stencil_ref(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1); + OUT_RING (chan, nvc0->stencil_ref.ref_value[0]); + BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1); + OUT_RING (chan, nvc0->stencil_ref.ref_value[1]); +} + +static void +nvc0_validate_stipple(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + unsigned i; + + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i])); +} + +static void +nvc0_validate_scissor(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_scissor_state *s = &nvc0->scissor; + + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) + return; + nvc0->state.scissor = nvc0->rast->pipe.scissor; + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + OUT_RING(chan, (s->maxx << 16) | s->minx); + OUT_RING(chan, (s->maxy << 16) | s->miny); + } else { + OUT_RING(chan, (0xffff << 16) | 0); + OUT_RING(chan, (0xffff << 16) | 0); + } +} + +static void +nvc0_validate_viewport(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); + OUT_RINGf (chan, vp->translate[0]); + OUT_RINGf (chan, vp->translate[1]); + OUT_RINGf (chan, vp->translate[2]); + BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); + OUT_RINGf (chan, vp->scale[0]); + OUT_RINGf (chan, vp->scale[1]); + OUT_RINGf (chan, vp->scale[2]); + + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = (int)(vp->translate[0] - fabsf(vp->scale[0])); + y = (int)(vp->translate[1] - fabsf(vp->scale[1])); + w = (int)fabsf(2.0f * vp->scale[0]); + h = (int)fabsf(2.0f * vp->scale[1]); + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (w << 16) | x); + OUT_RING (chan, (h << 16) | y); + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); + OUT_RINGf (chan, zmin); + OUT_RINGf (chan, zmax); +} + +static void +nvc0_validate_clip(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t clip; + + if (nvc0->clip.depth_clamp) { + clip = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + } else { + clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + } + + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, clip); + + if (nvc0->clip.nr) { + struct nouveau_bo *bo = nvc0->screen->uniforms; + + MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2); + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, 256); + OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1); + OUT_RING (chan, 0); + OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4); + + BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1); + OUT_RING (chan, (1 << nvc0->clip.nr) - 1); + } else { + IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0); + } +} + +static void +nvc0_validate_blend(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + WAIT_RING(chan, nvc0->blend->size); + OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size); +} + +static void +nvc0_validate_zsa(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + WAIT_RING(chan, nvc0->zsa->size); + OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size); +} + +static void +nvc0_validate_rasterizer(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + WAIT_RING(chan, nvc0->rast->size); + OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size); +} + +static void +nvc0_constbufs_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nouveau_bo *bo; + unsigned s; + + for (s = 0; s < 5; ++s) { + struct nv04_resource *res; + int i; + + while (nvc0->constbuf_dirty[s]) { + unsigned base = 0; + unsigned offset = 0, words = 0; + boolean rebind = TRUE; + + i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + res = nv04_resource(nvc0->constbuf[s][i]); + if (!res) { + BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); + OUT_RING (chan, (i << 4) | 0); + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + continue; + } + + if (!nouveau_resource_mapped_by_gpu(&res->base)) { + if (i == 0) { + base = s << 16; + bo = nvc0->screen->uniforms; + + if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) + rebind = FALSE; + else + nvc0->state.uniform_buffer_bound[s] = + align(res->base.width0, 0x100); + } else { + bo = res->bo; + } +#if 0 + nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM, + base, res->base.width0, res->data); + BEGIN_RING(chan, RING_3D_(0x021c), 1); + OUT_RING (chan, 0x1111); +#else + words = res->base.width0 / 4; +#endif + } else { + bo = res->bo; + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + + if (bo != nvc0->screen->uniforms) + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + if (rebind) { + MARK_RING (chan, 4, 2); + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, align(res->base.width0, 0x100)); + OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); + OUT_RING (chan, (i << 4) | 1); + } + + while (words) { + unsigned nr = AVAIL_RING(chan); + + if (nr < 16) { + FIRE_RING(chan); + continue; + } + nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + + MARK_RING (chan, nr + 5, 2); + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, align(res->base.width0, 0x100)); + OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1); + OUT_RING (chan, offset); + OUT_RINGp (chan, &res->data[offset], nr); + + offset += nr * 4; + words -= nr; + } + } + } +} + +static void +nvc0_validate_derived_1(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + boolean early_z; + + early_z = nvc0->fragprog->fp.early_z && !nvc0->zsa->pipe.alpha.enabled; + + if (early_z != nvc0->state.early_z) { + nvc0->state.early_z = early_z; + IMMED_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), early_z); + } +} + +static void +nvc0_switch_pipe_context(struct nvc0_context *ctx_to) +{ + struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NVC0_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NVC0_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NVC0_NEW_BLEND; + if (!ctx_to->rast) + ctx_to->dirty &= ~NVC0_NEW_RASTERIZER; + if (!ctx_to->zsa) + ctx_to->dirty &= ~NVC0_NEW_ZSA; + + ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = + ctx_to; +} + +static struct state_validate { + void (*func)(struct nvc0_context *); + uint32_t states; +} validate_list[] = { + { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, + { nvc0_validate_blend, NVC0_NEW_BLEND }, + { nvc0_validate_zsa, NVC0_NEW_ZSA }, + { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, + { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, + { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, + { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, + { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, + { nvc0_validate_clip, NVC0_NEW_CLIP }, + { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, + { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, + { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, + { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA }, + { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, + { nvc0_validate_textures, NVC0_NEW_TEXTURES }, + { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nvc0_state_validate(struct nvc0_context *nvc0) +{ + unsigned i; + + if (nvc0->screen->cur_ctx != nvc0) + nvc0_switch_pipe_context(nvc0); + + if (nvc0->dirty) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (nvc0->dirty & validate->states) + validate->func(nvc0); + } + nvc0->dirty = 0; + } + + nvc0_bufctx_emit_relocs(nvc0); + + return TRUE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h new file mode 100644 index 0000000000..8222f9375e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -0,0 +1,60 @@ + +#ifndef __NVC0_STATEOBJ_H__ +#define __NVC0_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = \ + (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) + +#define SB_IMMED_3D(so, m, d) \ + (so)->state[(so)->size++] = \ + (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +#include "nv50/nv50_stateobj_tex.h" + +struct nvc0_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[72]; +}; + +struct nvc0_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[36]; +}; + +struct nvc0_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[29]; +}; + +struct nvc0_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; +}; + +struct nvc0_vertex_stateobj { + struct translate *translate; + unsigned num_elements; + uint32_t instance_elts; + uint32_t instance_bufs; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ + unsigned vtx_size; + unsigned vtx_per_packet_max; + struct nvc0_vertex_element element[0]; +}; + +/* will have to lookup index -> location qualifier from nvc0_program */ +struct nvc0_transform_feedback_state { + uint32_t stride[4]; + uint8_t varying_count[4]; + uint8_t varying_index[0]; +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c new file mode 100644 index 0000000000..fc5f45ea25 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -0,0 +1,451 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nvc0_transfer.h" + +#include "nv50/nv50_defs.xml.h" + +#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL + +/* return TRUE for formats that can be converted among each other by NVC0_2D */ +static INLINE boolean +nvc0_2d_format_faithful(enum pipe_format format) +{ + uint8_t id = nvc0_format_table[format].rt; + + return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); +} + +static INLINE uint8_t +nvc0_2d_format(enum pipe_format format) +{ + uint8_t id = nvc0_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if (nvc0_2d_format_faithful(format)) + return id; + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT; + default: + return 0; + } +} + +static int +nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, + struct nvc0_miptree *mt, unsigned level, unsigned layer) +{ + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; + uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + uint32_t offset = mt->level[level].offset; + + format = nvc0_2d_format(mt->base.base.format); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(mt->base.base.format)); + return 1; + } + + width = u_minify(mt->base.base.width0, level); + height = u_minify(mt->base.base.height0, level); + depth = u_minify(mt->base.base.depth0, level); + + /* layer has to be < depth, and depth > tile depth / 2 */ + + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + layer = 0; + depth = 1; + } else + if (!dst) { + offset += nvc0_miptree_zslice_offset(mt, level, layer); + layer = 0; + } + + if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) { + BEGIN_RING(chan, RING_2D_(mthd), 2); + OUT_RING (chan, format); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5); + OUT_RING (chan, mt->level[level].pitch); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); + } else { + BEGIN_RING(chan, RING_2D_(mthd), 5); + OUT_RING (chan, format); + OUT_RING (chan, 0); + OUT_RING (chan, mt->level[level].tile_mode); + OUT_RING (chan, depth); + OUT_RING (chan, layer); + BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); + } + +#if 0 + if (dst) { + BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, width); + OUT_RING (chan, height); + } +#endif + return 0; +} + +static int +nvc0_2d_texture_do_copy(struct nouveau_channel *chan, + struct nvc0_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nvc0_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) +{ + int ret; + + ret = MARK_RING(chan, 2 * 16 + 32, 4); + if (ret) + return ret; + + ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz); + if (ret) + return ret; + + ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz); + if (ret) + return ret; + + /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ + BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); + + return 0; +} + +static void +nvc0_setup_m2mf_rect(struct nvc0_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z) +{ + struct nvc0_miptree *mt = nvc0_miptree(res); + const unsigned w = u_minify(res->width0, l); + const unsigned h = u_minify(res->height0, l); + + rect->bo = mt->base.bo; + rect->domain = mt->base.domain; + rect->base = mt->level[l].offset; + rect->pitch = mt->level[l].pitch; + if (util_format_is_plain(res->format)) { + rect->width = w; + rect->height = h; + rect->x = x; + rect->y = y; + } else { + rect->width = util_format_get_nblocksx(res->format, w); + rect->height = util_format_get_nblocksy(res->format, h); + rect->x = util_format_get_nblocksx(res->format, x); + rect->y = util_format_get_nblocksy(res->format, y); + } + rect->tile_mode = mt->level[l].tile_mode; + rect->cpp = util_format_get_blocksize(res->format); + + if (mt->layout_3d) { + rect->z = z; + rect->depth = u_minify(res->depth0, l); + } else { + rect->base += z * mt->layer_stride; + rect->z = 0; + rect->depth = 1; + } +} + +static void +nvc0_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nvc0_screen *screen = nvc0_context(pipe)->screen; + int ret; + unsigned dst_layer = dstz, src_layer = src_box->z; + + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + if (src->format == dst->format) { + struct nvc0_m2mf_rect drect, srect; + unsigned i; + unsigned nx = util_format_get_nblocksx(src->format, src_box->width); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + + nvc0_setup_m2mf_rect(&drect, dst, dst_level, dstx, dsty, dstz); + nvc0_setup_m2mf_rect(&srect, src, src_level, + src_box->x, src_box->y, src_box->z); + + for (i = 0; i < src_box->depth; ++i) { + nvc0_m2mf_transfer_rect(&screen->base.base, &drect, &srect, nx, ny); + + if (nvc0_miptree(dst)->layout_3d) + drect.z++; + else + drect.base += nvc0_miptree(dst)->layer_stride; + + if (nvc0_miptree(src)->layout_3d) + srect.z++; + else + srect.base += nvc0_miptree(src)->layer_stride; + } + return; + } + + assert(nvc0_2d_format_faithful(src->format)); + assert(nvc0_2d_format_faithful(dst->format)); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nvc0_2d_texture_do_copy(screen->base.channel, + nvc0_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nvc0_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + return; + } +} + +static void +nvc0_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nv50 = nvc0_context(pipe); + struct nvc0_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nvc0_miptree *mt = nvc0_miptree(dst->texture); + struct nvc0_surface *sf = nvc0_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); + + if (MARK_RING(chan, 18, 2)) + return; + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 9); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, nvc0_format_table[dst->format].rt); + OUT_RING (chan, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, dst->u.tex.first_layer + sf->depth); + OUT_RING (chan, mt->layer_stride >> 2); + OUT_RING (chan, dst->u.tex.first_layer); + + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, 0x3c); + + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nv50 = nvc0_context(pipe); + struct nvc0_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nvc0_miptree *mt = nvc0_miptree(dst->texture); + struct nvc0_surface *sf = nvc0_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); + OUT_RINGf (chan, depth); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); + OUT_RING (chan, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + if (MARK_RING(chan, 17, 2)) + return; + + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nvc0_format_table[dst->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, mt->layer_stride >> 2); + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); + BEGIN_RING(chan, RING_3D(ZETA_BASE_LAYER), 1); + OUT_RING (chan, dst->u.tex.first_layer); + + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, mode); + + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + const unsigned dirty = nvc0->dirty; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + nvc0->dirty &= NVC0_NEW_FRAMEBUFFER; + if (!nvc0_state_validate(nvc0)) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); + mode = + NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G | + NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); + OUT_RING (chan, fui(depth)); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); + OUT_RING (chan, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, (i << 6) | 0x3c); + } + + nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_init_surface_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->resource_copy_region = nvc0_resource_copy_region; + pipe->clear_render_target = nvc0_clear_render_target; + pipe->clear_depth_stencil = nvc0_clear_depth_stencil; +} + + diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c new file mode 100644 index 0000000000..24850b1998 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -0,0 +1,302 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nv50/nv50_texture.xml.h" + +#include "util/u_format.h" + +#define NV50_TIC_0_SWIZZLE__MASK \ + (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ + NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int) +{ + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } +} + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + const struct util_format_description *desc; + uint32_t *tic; + uint32_t swz[4]; + uint32_t depth; + struct nv50_tic_entry *view; + struct nvc0_miptree *mt = nvc0_miptree(texture); + boolean tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + /* TIC[0] */ + + tic[0] = nvc0_format_table[view->pipe.format].tic; + + tex_int = FALSE; /* XXX: integer textures */ + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + tic[1] = /* mt->base.bo->offset; */ 0; + tic[2] = /* mt->base.bo->offset >> 32 */ 0; + + tic[2] |= 0x10001000 | NV50_TIC_2_NO_BORDER; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (mt->base.base.target != PIPE_TEXTURE_RECT) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + tic[2] |= + ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) | + ((mt->base.bo->tile_mode & 0xf00) << (25 - 8)); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); + + if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY || + mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) { + /* there doesn't seem to be a base layer field in TIC */ + tic[1] = view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } + + switch (mt->base.base.target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + if (depth > 1) + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + else + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_BUFFER: + tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; + break; + default: + NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); + return FALSE; + } + + if (mt->base.base.target == PIPE_BUFFER) + tic[3] = mt->base.base.width0; + else + tic[3] = 0x00300000; + + tic[4] = (1 << 31) | mt->base.base.width0; + + tic[5] = mt->base.base.height0 & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << 28; + + tic[6] = 0x03000000; + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + + return &view->pipe; +} + +static boolean +nvc0_validate_tic(struct nvc0_context *nvc0, int s) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nouveau_bo *txc = nvc0->screen->txc; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + + if (!tic) { + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (i << 1) | 0); + continue; + } + res = &nvc0_miptree(tic->pipe.texture)->base; + + if (tic->id < 0) { + uint32_t offset = tic->tic[1]; + + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + MARK_RING (chan, 9 + 8, 4); + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, 32); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, RING_MF(DATA), 8); + OUT_RING (chan, tic->tic[0]); + OUT_RELOCl(chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOC (chan, res->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]); + OUT_RINGp (chan, &tic->tic[3], 5); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, (tic->id << 4) | 1); + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (tic->id << 9) | (i << 1) | 1); + } + for (; i < nvc0->state.num_textures[s]; ++i) { + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (i << 1) | 0); + } + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + return need_flush; +} + +void nvc0_validate_textures(struct nvc0_context *nvc0) +{ + boolean need_flush; + + need_flush = nvc0_validate_tic(nvc0, 0); + need_flush |= nvc0_validate_tic(nvc0, 4); + + if (need_flush) { + BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1); + OUT_RING (nvc0->screen->base.channel, 0); + } +} + +static boolean +nvc0_validate_tsc(struct nvc0_context *nvc0, int s) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); + + if (!tsc) { + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (i << 4) | 0); + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, + 65536 + tsc->id * 32, NOUVEAU_BO_VRAM, + 32, tsc->tsc); + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1); + } + for (; i < nvc0->state.num_samplers[s]; ++i) { + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (i << 4) | 0); + } + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + return need_flush; +} + +void nvc0_validate_samplers(struct nvc0_context *nvc0) +{ + boolean need_flush; + + need_flush = nvc0_validate_tsc(nvc0, 0); + need_flush |= nvc0_validate_tsc(nvc0, 4); + + if (need_flush) { + BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1); + OUT_RING (nvc0->screen->base.channel, 0); + } +} diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c new file mode 100644 index 0000000000..a44d330c73 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -0,0 +1,2023 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <unistd.h> + +#define NOUVEAU_DEBUG 1 + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_dynarray.h" + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +/* Arbitrary internal limits. */ +#define BLD_MAX_TEMPS 64 +#define BLD_MAX_ADDRS 4 +#define BLD_MAX_PREDS 4 +#define BLD_MAX_IMMDS 128 +#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS + +#define BLD_MAX_COND_NESTING 8 +#define BLD_MAX_LOOP_NESTING 4 +#define BLD_MAX_CALL_NESTING 2 + +/* This structure represents a TGSI register. */ +struct bld_register { + struct nv_value *current; + /* collect all SSA values assigned to it */ + struct util_dynarray vals; + /* 1 bit per loop level, indicates if used/defd, reset when loop ends */ + uint16_t loop_use; + uint16_t loop_def; +}; + +static INLINE struct nv_value ** +bld_register_access(struct bld_register *reg, unsigned i) +{ + return util_dynarray_element(®->vals, struct nv_value *, i); +} + +static INLINE void +bld_register_add_val(struct bld_register *reg, struct nv_value *val) +{ + struct nv_basic_block *bb = val->insn->bb; + + if (reg->vals.size && + (util_dynarray_top(®->vals, struct nv_value *))->insn->bb == bb) + *(util_dynarray_top_ptr(®->vals, struct nv_value *)) = val; + else + util_dynarray_append(®->vals, struct nv_value *, val); +} + +static INLINE boolean +bld_register_del_val(struct bld_register *reg, struct nv_value *val) +{ + unsigned i; + + for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i) + if (*bld_register_access(reg, i - 1) == val) + break; + if (!i) + return FALSE; + + if (i != reg->vals.size / sizeof(struct nv_value *)) + *bld_register_access(reg, i - 1) = util_dynarray_pop(®->vals, + struct nv_value *); + else + reg->vals.size -= sizeof(struct nv_value *); + + return TRUE; +} + +struct bld_context { + struct nvc0_translation_info *ti; + + struct nv_pc *pc; + struct nv_basic_block *b; + + struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING]; + int call_lvl; + + struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING]; + struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING]; + struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; + int cond_lvl; + struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; + struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; + int loop_lvl; + + ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ + + struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ + struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */ + struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ + struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */ + + uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; + int hpos_index; + + struct nv_value *zero; + struct nv_value *frag_coord[4]; + + /* wipe on new BB */ + struct nv_value *saved_sysvals[4]; + struct nv_value *saved_addr[4][2]; + struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4]; + struct nv_value *saved_immd[BLD_MAX_IMMDS]; + uint num_immds; +}; + +static INLINE ubyte +bld_register_file(struct bld_context *bld, struct bld_register *reg) +{ + if (reg >= &bld->pvs[0][0] && + reg < &bld->ovs[0][0]) + return NV_FILE_PRED; + return NV_FILE_GPR; +} + +static INLINE struct nv_value * +bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c) +{ + regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl; + return regs[i * 4 + c].current; +} + +static struct nv_value * +bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *); + +/* If a variable is defined in a loop without prior use, we don't need + * a phi in the loop header to account for backwards flow. + * + * However, if this variable is then also used outside the loop, we do + * need a phi after all. But we must not use this phi's def inside the + * loop, so we can eliminate the phi if it is unused later. + */ +static INLINE void +bld_store(struct bld_context *bld, + struct bld_register *regs, int i, int c, struct nv_value *val) +{ + const uint16_t m = 1 << bld->loop_lvl; + struct bld_register *reg = ®s[i * 4 + c]; + + if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use))) + bld_loop_phi(bld, reg, val); + + reg->current = val; + bld_register_add_val(reg, reg->current); + + reg->loop_def |= 1 << bld->loop_lvl; +} + +#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c) +#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) +#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c) +#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) +#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c) +#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) +#define STORE_OUTP(i, c, v) \ + do { \ + bld_store(bld, &bld->ovs[0][0], i, c, (v)); \ + bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ + } while (0) + +static INLINE void +bld_clear_def_use(struct bld_register *regs, int n, int lvl) +{ + int i; + const uint16_t mask = ~(1 << lvl); + + for (i = 0; i < n * 4; ++i) { + regs[i].loop_def &= mask; + regs[i].loop_use &= mask; + } +} + +static INLINE void +bld_warn_uninitialized(struct bld_context *bld, int kind, + struct bld_register *reg, struct nv_basic_block *b) +{ +#ifdef NOUVEAU_DEBUG + long i = (reg - &bld->tvs[0][0]) / 4; + long c = (reg - &bld->tvs[0][0]) & 3; + + if (c == 3) + c = -1; + debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", + i, (int)('x' + c), kind ? "may be" : "is", b->id); +#endif +} + +static INLINE struct nv_value * +bld_def(struct nv_instruction *i, int c, struct nv_value *value) +{ + i->def[c] = value; + value->insn = i; + return value; +} + +static INLINE struct nv_value * +find_by_bb(struct bld_register *reg, struct nv_basic_block *b) +{ + int i; + + if (reg->current && reg->current->insn->bb == b) + return reg->current; + + for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i) + if ((*bld_register_access(reg, i))->insn->bb == b) + return *bld_register_access(reg, i); + return NULL; +} + +/* Fetch value from register that was defined in the specified BB, + * or search for first definitions in all of its predecessors. + */ +static void +fetch_by_bb(struct bld_register *reg, + struct nv_value **vals, int *n, + struct nv_basic_block *b) +{ + int i; + struct nv_value *val; + + assert(*n < 16); /* MAX_COND_NESTING */ + + val = find_by_bb(reg, b); + if (val) { + for (i = 0; i < *n; ++i) + if (vals[i] == val) + return; + vals[(*n)++] = val; + return; + } + for (i = 0; i < b->num_in; ++i) + if (!IS_WALL_EDGE(b->in_kind[i])) + fetch_by_bb(reg, vals, n, b->in[i]); +} + +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u); + +static INLINE struct nv_value * +bld_undef(struct bld_context *bld, ubyte file) +{ + struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); + + return bld_def(nvi, 0, new_value(bld->pc, file, 4)); +} + +static struct nv_value * +bld_phi(struct bld_context *bld, struct nv_basic_block *b, + struct bld_register *reg) +{ + struct nv_basic_block *in; + struct nv_value *vals[16] = { NULL }; + struct nv_value *val; + struct nv_instruction *phi; + int i, j, n; + + do { + i = n = 0; + fetch_by_bb(reg, vals, &n, b); + + if (!n) { + bld_warn_uninitialized(bld, 0, reg, b); + return NULL; + } + + if (n == 1) { + if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb)) + break; + + bld_warn_uninitialized(bld, 1, reg, b); + + /* back-tracking to insert missing value of other path */ + in = b; + while (in->in[0]) { + if (in->num_in == 1) { + in = in->in[0]; + } else { + if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b)) + in = in->in[0]; + else + if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b)) + in = in->in[1]; + else + in = in->in[0]; + } + } + bld->pc->current_block = in; + + /* should make this a no-op */ + bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file)); + continue; + } + + for (i = 0; i < n; ++i) { + /* if value dominates b, continue to the redefinitions */ + if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb)) + continue; + + /* if value dominates any in-block, b should be the dom frontier */ + for (j = 0; j < b->num_in; ++j) + if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb)) + break; + /* otherwise, find the dominance frontier and put the phi there */ + if (j == b->num_in) { + in = nvc0_bblock_dom_frontier(vals[i]->insn->bb); + val = bld_phi(bld, in, reg); + bld_register_add_val(reg, val); + break; + } + } + } while(i < n); + + bld->pc->current_block = b; + + if (n == 1) + return vals[0]; + + phi = new_instruction(bld->pc, NV_OP_PHI); + + bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size)); + for (i = 0; i < n; ++i) + nv_reference(bld->pc, phi, i, vals[i]); + + return phi->def[0]; +} + +/* Insert a phi function in the loop header. + * For nested loops, we need to insert phi functions in all the outer + * loop headers if they don't have one yet. + * + * @def: redefinition from inside loop, or NULL if to be replaced later + */ +static struct nv_value * +bld_loop_phi(struct bld_context *bld, struct bld_register *reg, + struct nv_value *def) +{ + struct nv_instruction *phi; + struct nv_basic_block *bb = bld->pc->current_block; + struct nv_value *val = NULL; + + if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */ + return NULL; + + if (bld->loop_lvl > 1) { + --bld->loop_lvl; + if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) + val = bld_loop_phi(bld, reg, NULL); + ++bld->loop_lvl; + } + + if (!val) + val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */ + if (!val) { + bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; + val = bld_undef(bld, bld_register_file(bld, reg)); + } + + bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; + + phi = new_instruction(bld->pc, NV_OP_PHI); + + bld_def(phi, 0, new_value_like(bld->pc, val)); + if (!def) + def = phi->def[0]; + + bld_register_add_val(reg, phi->def[0]); + + phi->target = (struct nv_basic_block *)reg; /* cheat */ + + nv_reference(bld->pc, phi, 0, val); + nv_reference(bld->pc, phi, 1, def); + + bld->pc->current_block = bb; + + return phi->def[0]; +} + +static INLINE struct nv_value * +bld_fetch_global(struct bld_context *bld, struct bld_register *reg) +{ + const uint16_t m = 1 << bld->loop_lvl; + const uint16_t use = reg->loop_use; + + reg->loop_use |= m; + + /* If neither used nor def'd inside the loop, build a phi in foresight, + * so we don't have to replace stuff later on, which requires tracking. + */ + if (bld->loop_lvl && !((use | reg->loop_def) & m)) + return bld_loop_phi(bld, reg, NULL); + + return bld_phi(bld, bld->pc->current_block, reg); +} + +static INLINE struct nv_value * +bld_imm_u32(struct bld_context *bld, uint32_t u) +{ + int i; + unsigned n = bld->num_immds; + + for (i = 0; i < n; ++i) + if (bld->saved_immd[i]->reg.imm.u32 == u) + return bld->saved_immd[i]; + + assert(n < BLD_MAX_IMMDS); + bld->num_immds++; + + bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4); + bld->saved_immd[n]->reg.imm.u32 = u; + return bld->saved_immd[n]; +} + +static void +bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, + struct nv_value *); + +/* Replace the source of the phi in the loop header by the last assignment, + * or eliminate the phi function if there is no assignment inside the loop. + * + * Redundancy situation 1 - (used) but (not redefined) value: + * %3 = phi %0, %3 = %3 is used + * %3 = phi %0, %4 = is new definition + * + * Redundancy situation 2 - (not used) but (redefined) value: + * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE + */ +static void +bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) +{ + struct nv_basic_block *save = bld->pc->current_block; + struct nv_instruction *phi, *next; + struct nv_value *val; + struct bld_register *reg; + int i, s, n; + + for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { + next = phi->next; + + reg = (struct bld_register *)phi->target; + phi->target = NULL; + + /* start with s == 1, src[0] is from outside the loop */ + for (s = 1, n = 0; n < bb->num_in; ++n) { + if (bb->in_kind[n] != CFG_EDGE_BACK) + continue; + + assert(s < 4); + bld->pc->current_block = bb->in[n]; + val = bld_fetch_global(bld, reg); + + for (i = 0; i < 4; ++i) + if (phi->src[i] && phi->src[i]->value == val) + break; + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); + nv_reference(bld->pc, phi, s++, val); + } + } + bld->pc->current_block = save; + + if (phi->src[0]->value == phi->def[0] || + phi->src[0]->value == phi->src[1]->value) + s = 1; + else + if (phi->src[1]->value == phi->def[0]) + s = 0; + else + continue; + + if (s >= 0) { + /* eliminate the phi */ + bld_register_del_val(reg, phi->def[0]); + + ++bld->pc->pass_seq; + bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); + + nvc0_insn_delete(phi); + } + } +} + +static INLINE struct nv_value * +bld_imm_f32(struct bld_context *bld, float f) +{ + return bld_imm_u32(bld, fui(f)); +} + +static struct nv_value * +bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0) +{ + struct nv_instruction *insn = new_instruction(bld->pc, opcode); + + nv_reference(bld->pc, insn, 0, src0); + + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static struct nv_value * +bld_insn_2(struct bld_context *bld, uint opcode, + struct nv_value *src0, struct nv_value *src1) +{ + struct nv_instruction *insn = new_instruction(bld->pc, opcode); + + nv_reference(bld->pc, insn, 0, src0); + nv_reference(bld->pc, insn, 1, src1); + + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static struct nv_value * +bld_insn_3(struct bld_context *bld, uint opcode, + struct nv_value *src0, struct nv_value *src1, + struct nv_value *src2) +{ + struct nv_instruction *insn = new_instruction(bld->pc, opcode); + + nv_reference(bld->pc, insn, 0, src0); + nv_reference(bld->pc, insn, 1, src1); + nv_reference(bld->pc, insn, 2, src2); + + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static INLINE void +bld_src_predicate(struct bld_context *bld, + struct nv_instruction *nvi, int s, struct nv_value *val) +{ + nvi->predicate = s; + nv_reference(bld->pc, nvi, s, val); +} + +static INLINE void +bld_src_pointer(struct bld_context *bld, + struct nv_instruction *nvi, int s, struct nv_value *val) +{ + nvi->indirect = s; + nv_reference(bld->pc, nvi, s, val); +} + +static void +bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, + struct nv_value *val) +{ + struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST); + struct nv_value *loc; + + loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); + + loc->reg.address = ofst * 4; + + nv_reference(bld->pc, insn, 0, loc); + nv_reference(bld->pc, insn, 1, val); + if (ptr) + bld_src_pointer(bld, insn, 2, ptr); +} + +static struct nv_value * +bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) +{ + struct nv_value *loc, *val; + + loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); + + loc->reg.address = ofst * 4; + + val = bld_insn_1(bld, NV_OP_LD, loc); + if (ptr) + bld_src_pointer(bld, val->insn, 1, ptr); + + return val; +} + +static struct nv_value * +bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) +{ + struct nv_value *val; + + val = bld_insn_1(bld, NV_OP_LG2, x); + val = bld_insn_2(bld, NV_OP_MUL_F32, e, val); + + val = bld_insn_1(bld, NV_OP_PREEX2, val); + val = bld_insn_1(bld, NV_OP_EX2, val); + + return val; +} + +static INLINE struct nv_value * +bld_load_imm_f32(struct bld_context *bld, float f) +{ + if (f == 0.0f) + return bld->zero; + return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); +} + +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u) +{ + if (u == 0) + return bld->zero; + return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u)); +} + +static INLINE struct nv_value * +bld_setp(struct bld_context *bld, uint op, uint8_t cc, + struct nv_value *src0, struct nv_value *src1) +{ + struct nv_value *val = bld_insn_2(bld, op, src0, src1); + + val->reg.file = NV_FILE_PRED; + val->reg.size = 1; + val->insn->set_cond = cc & 0xf; + return val; +} + +static INLINE struct nv_value * +bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src) +{ + struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src); + val->insn->ext.cvt.d = dt; + val->insn->ext.cvt.s = st; + return val; +} + +static void +bld_kil(struct bld_context *bld, struct nv_value *src) +{ + struct nv_instruction *nvi; + + src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero); + + nvi = new_instruction(bld->pc, NV_OP_KIL); + nvi->fixed = 1; + + bld_src_predicate(bld, nvi, 0, src); +} + +static void +bld_flow(struct bld_context *bld, uint opcode, + struct nv_value *pred, uint8_t cc, struct nv_basic_block *target, + boolean reconverge) +{ + struct nv_instruction *nvi; + + if (reconverge) + new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1; + + nvi = new_instruction(bld->pc, opcode); + nvi->target = target; + nvi->terminator = 1; + if (pred) { + nvi->cc = cc; + bld_src_predicate(bld, nvi, 0, pred); + } +} + +static ubyte +translate_setcc(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_SLT: return NV_CC_LT; + case TGSI_OPCODE_SGE: return NV_CC_GE; + case TGSI_OPCODE_SEQ: return NV_CC_EQ; + case TGSI_OPCODE_SGT: return NV_CC_GT; + case TGSI_OPCODE_SLE: return NV_CC_LE; + case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U; + case TGSI_OPCODE_STR: return NV_CC_TR; + case TGSI_OPCODE_SFL: return NV_CC_FL; + + case TGSI_OPCODE_ISLT: return NV_CC_LT; + case TGSI_OPCODE_ISGE: return NV_CC_GE; + case TGSI_OPCODE_USEQ: return NV_CC_EQ; + case TGSI_OPCODE_USGE: return NV_CC_GE; + case TGSI_OPCODE_USLT: return NV_CC_LT; + case TGSI_OPCODE_USNE: return NV_CC_NE; + default: + assert(0); + return NV_CC_FL; + } +} + +static uint +translate_opcode(uint opcode) +{ + switch (opcode) { + case TGSI_OPCODE_ABS: return NV_OP_ABS_F32; + case TGSI_OPCODE_ADD: return NV_OP_ADD_F32; + case TGSI_OPCODE_SUB: return NV_OP_SUB_F32; + case TGSI_OPCODE_UADD: return NV_OP_ADD_B32; + case TGSI_OPCODE_AND: return NV_OP_AND; + case TGSI_OPCODE_EX2: return NV_OP_EX2; + case TGSI_OPCODE_CEIL: return NV_OP_CEIL; + case TGSI_OPCODE_FLR: return NV_OP_FLOOR; + case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC; + case TGSI_OPCODE_COS: return NV_OP_COS; + case TGSI_OPCODE_SIN: return NV_OP_SIN; + case TGSI_OPCODE_DDX: return NV_OP_DFDX; + case TGSI_OPCODE_DDY: return NV_OP_DFDY; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: return NV_OP_CVT; + case TGSI_OPCODE_INEG: return NV_OP_NEG_S32; + case TGSI_OPCODE_LG2: return NV_OP_LG2; + case TGSI_OPCODE_ISHR: return NV_OP_SAR; + case TGSI_OPCODE_USHR: return NV_OP_SHR; + case TGSI_OPCODE_MAD: return NV_OP_MAD_F32; + case TGSI_OPCODE_MAX: return NV_OP_MAX_F32; + case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32; + case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32; + case TGSI_OPCODE_MIN: return NV_OP_MIN_F32; + case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32; + case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32; + case TGSI_OPCODE_MUL: return NV_OP_MUL_F32; + case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32; + case TGSI_OPCODE_OR: return NV_OP_OR; + case TGSI_OPCODE_RCP: return NV_OP_RCP; + case TGSI_OPCODE_RSQ: return NV_OP_RSQ; + case TGSI_OPCODE_SAD: return NV_OP_SAD; + case TGSI_OPCODE_SHL: return NV_OP_SHL; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: return NV_OP_FSET_F32; + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: return NV_OP_SET_S32; + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: return NV_OP_SET_U32; + case TGSI_OPCODE_TEX: return NV_OP_TEX; + case TGSI_OPCODE_TXP: return NV_OP_TEX; + case TGSI_OPCODE_TXB: return NV_OP_TXB; + case TGSI_OPCODE_TXL: return NV_OP_TXL; + case TGSI_OPCODE_XOR: return NV_OP_XOR; + default: + return NV_OP_NOP; + } +} + +#if 0 +static ubyte +infer_src_type(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_USHR: + return NV_TYPE_U32; + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + return NV_TYPE_S32; + default: + return NV_TYPE_F32; + } +} + +static ubyte +infer_dst_type(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_USHR: + return NV_TYPE_U32; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + return NV_TYPE_S32; + default: + return NV_TYPE_F32; + } +} +#endif + +static void +emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, + unsigned chan, struct nv_value *res) +{ + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + struct nv_instruction *nvi; + struct nv_value *mem; + struct nv_value *ptr = NULL; + int idx; + + idx = reg->Register.Index; + assert(chan < 4); + + if (reg->Register.Indirect) + ptr = FETCH_ADDR(reg->Indirect.Index, + tgsi_util_get_src_register_swizzle(®->Indirect, 0)); + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + res = bld_insn_1(bld, NV_OP_SAT, res); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f)); + res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f)); + break; + } + + switch (reg->Register.File) { + case TGSI_FILE_OUTPUT: + if (!res->insn) + res = bld_insn_1(bld, NV_OP_MOV, res); + + if (bld->pc->is_fragprog) { + assert(!ptr); + STORE_OUTP(idx, chan, res); + } else { + nvi = new_instruction(bld->pc, NV_OP_EXPORT); + mem = new_value(bld->pc, bld->ti->output_file, res->reg.size); + nv_reference(bld->pc, nvi, 0, mem); + nv_reference(bld->pc, nvi, 1, res); + if (!ptr) + mem->reg.address = bld->ti->output_loc[idx][chan]; + else + mem->reg.address = 0x80 + idx * 16 + chan * 4; + nvi->fixed = 1; + } + break; + case TGSI_FILE_TEMPORARY: + assert(idx < BLD_MAX_TEMPS); + if (!res->insn || res->insn->bb != bld->pc->current_block) + res = bld_insn_1(bld, NV_OP_MOV, res); + + assert(res->reg.file == NV_FILE_GPR); + + if (bld->ti->require_stores) + bld_lmem_store(bld, ptr, idx * 4 + chan, res); + else + STORE_TEMP(idx, chan, res); + break; + case TGSI_FILE_ADDRESS: + assert(idx < BLD_MAX_ADDRS); + STORE_ADDR(idx, chan, res); + break; + } +} + +static INLINE uint32_t +bld_is_output_written(struct bld_context *bld, int i, int c) +{ + if (c < 0) + return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32)); + return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); +} + +static void +bld_append_vp_ucp(struct bld_context *bld) +{ + struct nv_value *res[6]; + struct nv_value *ucp, *vtx, *out; + struct nv_instruction *insn; + int i, c; + + assert(bld->ti->prog->vp.num_ucps <= 6); + + for (c = 0; c < 4; ++c) { + vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]); + + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { + ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4); + ucp->reg.address = i * 16 + c * 4; + + if (c == 0) + res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp); + else + res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]); + } + } + + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { + (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4; + (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; + nv_reference(bld->pc, insn, 0, out); + nv_reference(bld->pc, insn, 1, res[i]); + } +} + +static void +bld_export_fp_outputs(struct bld_context *bld) +{ + struct nv_value *vals[4]; + struct nv_instruction *nvi; + int i, c, n; + + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { + if (!bld_is_output_written(bld, i, -1)) + continue; + for (n = 0, c = 0; c < 4; ++c) { + if (!bld_is_output_written(bld, i, c)) + continue; + vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]); + assert(vals[n]); + vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]); + vals[n++]->reg.id = bld->ti->output_loc[i][c]; + } + assert(n); + + (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; + for (c = 0; c < n; ++c) + nv_reference(bld->pc, nvi, c, vals[c]); + } +} + +static void +bld_new_block(struct bld_context *bld, struct nv_basic_block *b) +{ + int i, c; + + bld->pc->current_block = b; + + for (i = 0; i < 4; ++i) + bld->saved_addr[i][0] = NULL; + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) + for (c = 0; c < 4; ++c) + bld->saved_inputs[i][c] = NULL; + + bld->out_kind = CFG_EDGE_FORWARD; +} + +static struct nv_value * +bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) +{ + unsigned cent = mode & NVC0_INTERP_CENTROID; + + mode &= ~NVC0_INTERP_CENTROID; + + if (val->reg.address == 0x3fc) { + /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ + val = bld_insn_1(bld, NV_OP_LINTERP, val); + val->insn->flat = 1; + val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); + val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); + return val; + } else + if (mode == NVC0_INTERP_PERSPECTIVE) { + val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]); + } else { + val = bld_insn_1(bld, NV_OP_LINTERP, val); + } + + val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0; + val->insn->centroid = cent ? 1 : 0; + return val; +} + +static struct nv_value * +emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, + const unsigned s, const unsigned chan) +{ + const struct tgsi_full_src_register *src = &insn->Src[s]; + struct nv_value *res = NULL; + struct nv_value *ptr = NULL; + int idx, ind_idx, dim_idx; + unsigned swz, ind_swz, sgn; + + idx = src->Register.Index; + swz = tgsi_util_get_full_src_register_swizzle(src, chan); + + if (src->Register.Indirect) { + ind_idx = src->Indirect.Index; + ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0); + + ptr = FETCH_ADDR(ind_idx, ind_swz); + } + + if (src->Register.Dimension) + dim_idx = src->Dimension.Index; + else + dim_idx = 0; + + switch (src->Register.File) { + case TGSI_FILE_CONSTANT: + assert(dim_idx < 14); + res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4); + res->reg.address = idx * 16 + swz * 4; + res = bld_insn_1(bld, NV_OP_LD, res); + if (ptr) + bld_src_pointer(bld, res->insn, 1, ptr); + break; + case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */ + assert(idx < bld->ti->immd32_nr); + res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]); + break; + case TGSI_FILE_INPUT: + assert(!src->Register.Dimension); + if (!ptr) { + res = bld->saved_inputs[idx][swz]; + if (res) + break; + } + res = new_value(bld->pc, bld->ti->input_file, 4); + if (ptr) + res->reg.address = 0x80 + idx * 16 + swz * 4; + else + res->reg.address = bld->ti->input_loc[idx][swz]; + + if (bld->pc->is_fragprog) + res = bld_interp(bld, bld->ti->interp_mode[idx], res); + else + res = bld_insn_1(bld, NV_OP_VFETCH, res); + + if (ptr) + bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr); + else + bld->saved_inputs[idx][swz] = res; + break; + case TGSI_FILE_TEMPORARY: + if (bld->ti->require_stores) + res = bld_lmem_load(bld, ptr, idx * 4 + swz); + else + res = bld_fetch_global(bld, &bld->tvs[idx][swz]); + break; + case TGSI_FILE_ADDRESS: + res = bld_fetch_global(bld, &bld->avs[idx][swz]); + break; + case TGSI_FILE_PREDICATE: + res = bld_fetch_global(bld, &bld->pvs[idx][swz]); + break; + case TGSI_FILE_SYSTEM_VALUE: + assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */ + res = new_value(bld->pc, + bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4); + res->reg.address = bld->ti->sysval_loc[idx]; + + if (res->reg.file == NV_FILE_MEM_A) + res = bld_insn_1(bld, NV_OP_VFETCH, res); + else + res = bld_interp(bld, NVC0_INTERP_FLAT, res); + + /* mesa doesn't do real integers yet :-(and in GL this should be S32) */ + res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res); + break; + default: + NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); + abort(); + break; + } + if (!res) + return bld_undef(bld, NV_FILE_GPR); + + sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); + + switch (sgn) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + res = bld_insn_1(bld, NV_OP_ABS_F32, res); + break; + case TGSI_UTIL_SIGN_TOGGLE: + res = bld_insn_1(bld, NV_OP_NEG_F32, res); + break; + case TGSI_UTIL_SIGN_SET: + res = bld_insn_1(bld, NV_OP_ABS_F32, res); + res = bld_insn_1(bld, NV_OP_NEG_F32, res); + break; + default: + NOUVEAU_ERR("illegal/unhandled src reg sign mode\n"); + abort(); + break; + } + + return res; +} + +static void +bld_lit(struct bld_context *bld, struct nv_value *dst0[4], + const struct tgsi_full_instruction *insn) +{ + struct nv_value *val0 = NULL; + unsigned mask = insn->Dst[0].Register.WriteMask; + + if (mask & ((1 << 0) | (1 << 3))) + dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f); + + if (mask & (3 << 1)) { + val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero); + if (mask & (1 << 1)) + dst0[1] = val0; + } + + if (mask & (1 << 2)) { + struct nv_value *val1, *val3, *src1, *src3, *pred; + struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f); + struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f); + + src1 = emit_fetch(bld, insn, 0, 1); + src3 = emit_fetch(bld, insn, 0, 3); + + pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero); + + val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero); + val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128); + val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128); + val3 = bld_pow(bld, val1, val3); + + dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero); + bld_src_predicate(bld, dst0[2]->insn, 1, pred); + + dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]); + } +} + +static INLINE void +describe_texture_target(unsigned target, int *dim, + int *array, int *cube, int *shadow) +{ + *dim = *array = *cube = *shadow = 0; + + switch (target) { + case TGSI_TEXTURE_1D: + *dim = 1; + break; + case TGSI_TEXTURE_SHADOW1D: + *dim = *shadow = 1; + break; + case TGSI_TEXTURE_UNKNOWN: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + *dim = 2; + break; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + *dim = 2; + *shadow = 1; + break; + case TGSI_TEXTURE_3D: + *dim = 3; + break; + case TGSI_TEXTURE_CUBE: + *dim = 2; + *cube = 1; + break; + case TGSI_TEXTURE_1D_ARRAY: + *dim = *array = 1; + break; + case TGSI_TEXTURE_2D_ARRAY: + *dim = 2; + *array = 1; + break; + /* + case TGSI_TEXTURE_SHADOW1D_ARRAY: + *dim = *array = *shadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D_ARRAY: + *dim = 2; + *array = *shadow = 1; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *cube = *array = 1; + break; + */ + default: + assert(0); + break; + } +} + +static struct nv_value * +bld_clone(struct bld_context *bld, struct nv_instruction *nvi) +{ + struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode); + struct nv_instruction *next, *prev; + int c; + + next = dupi->next; + prev = dupi->prev; + + *dupi = *nvi; + + dupi->next = next; + dupi->prev = prev; + + for (c = 0; c < 5 && nvi->def[c]; ++c) + bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c])); + + for (c = 0; c < 6 && nvi->src[c]; ++c) { + dupi->src[c] = NULL; + nv_reference(bld->pc, dupi, c, nvi->src[c]->value); + } + + return dupi->def[0]; +} + +/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ +static void +load_proj_tex_coords(struct bld_context *bld, + struct nv_value *t[4], int dim, int shadow, + const struct tgsi_full_instruction *insn) +{ + int c; + unsigned mask = (1 << dim) - 1; + + if (shadow) + mask |= 4; /* depth comparison value */ + + t[3] = emit_fetch(bld, insn, 0, 3); + if (t[3]->insn->opcode == NV_OP_PINTERP) { + t[3] = bld_clone(bld, t[3]->insn); + t[3]->insn->opcode = NV_OP_LINTERP; + nv_reference(bld->pc, t[3]->insn, 1, NULL); + } + t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); + + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + t[c] = emit_fetch(bld, insn, 0, c); + + if (t[c]->insn->opcode != NV_OP_PINTERP) + continue; + mask &= ~(1 << c); + + t[c] = bld_clone(bld, t[c]->insn); + nv_reference(bld->pc, t[c]->insn, 1, t[3]); + } + if (mask == 0) + return; + + t[3] = emit_fetch(bld, insn, 0, 3); + t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); + + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]); +} + +/* For a quad of threads / top left, top right, bottom left, bottom right + * pixels, do a different operation, and take src0 from a specific thread. + */ +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV1 3 + +#define QOP(a, b, c, d) \ + ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6)) + +static INLINE struct nv_value * +bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, + struct nv_value *src1, boolean wp) +{ + struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1); + val->insn->lanes = lane; + val->insn->quadop = qop; + if (wp) { + assert(!"quadop predicate write"); + } + return val; +} + +/* order of TGSI operands: x y z layer shadow lod/bias */ +/* order of native operands: layer x y z | lod/bias shadow */ +static struct nv_instruction * +emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, + struct nv_value *dst[4], struct nv_value *arg[4], + int dim, int array, int cube, int shadow) +{ + struct nv_value *src[4]; + struct nv_instruction *nvi, *bnd; + int c; + int s = 0; + boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + + if (array) + arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); + + /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ + + bnd = new_instruction(bld->pc, NV_OP_BIND); + if (array) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, s, src[s]); + nv_reference(bld->pc, bnd, s++, arg[dim + cube]); + } + for (c = 0; c < dim + cube; ++c, ++s) { + src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, bnd, s, arg[c]); + } + + if (shadow || lodbias) { + bnd = new_instruction(bld->pc, NV_OP_BIND); + + if (lodbias) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, 0, src[s++]); + nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); + } + if (shadow) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, lodbias, src[s++]); + nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); + } + } + + nvi = new_instruction(bld->pc, opcode); + for (c = 0; c < 4; ++c) + dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); + for (c = 0; c < s; ++c) + nv_reference(bld->pc, nvi, c, src[c]); + + nvi->ext.tex.t = tic; + nvi->ext.tex.s = tsc; + nvi->tex_mask = 0xf; + nvi->tex_cube = cube; + nvi->tex_dim = dim; + nvi->tex_cube = cube; + nvi->tex_shadow = shadow; + nvi->tex_array = array; + nvi->tex_live = 0; + + return nvi; +} + +static void +bld_tex(struct bld_context *bld, struct nv_value *dst0[4], + const struct tgsi_full_instruction *insn) +{ + struct nv_value *t[4], *s[3]; + uint opcode = translate_opcode(insn->Instruction.Opcode); + int c, dim, array, cube, shadow; + const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + const int tic = insn->Src[1].Register.Index; + const int tsc = tic; + + describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); + + assert(dim + array + shadow + lodbias <= 5); + + if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP) + load_proj_tex_coords(bld, t, dim, shadow, insn); + else { + for (c = 0; c < dim + cube + array; ++c) + t[c] = emit_fetch(bld, insn, 0, c); + if (shadow) + t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); + } + + if (cube) { + for (c = 0; c < 3; ++c) + s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); + + s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]); + s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]); + s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]); + + for (c = 0; c < 3; ++c) + t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); + } + + if (lodbias) + t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); + + emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); +} + +static INLINE struct nv_value * +bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn, + int n) +{ + struct nv_value *dotp, *src0, *src1; + int c; + + src0 = emit_fetch(bld, insn, 0, 0); + src1 = emit_fetch(bld, insn, 1, 0); + dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + + for (c = 1; c < n; ++c) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp); + } + return dotp; +} + +#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \ + for (chan = 0; chan < 4; ++chan) \ + if ((inst)->Dst[0].Register.WriteMask & (1 << chan)) + +static void +bld_instruction(struct bld_context *bld, + const struct tgsi_full_instruction *insn) +{ + struct nv_value *src0; + struct nv_value *src1; + struct nv_value *src2; + struct nv_value *dst0[4] = { NULL }; + struct nv_value *temp; + int c; + uint opcode = translate_opcode(insn->Instruction.Opcode); + uint8_t mask = insn->Dst[0].Register.WriteMask; + +#ifdef NOUVEAU_DEBUG + debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); +#endif + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dst0[c] = bld_insn_2(bld, opcode, src0, src1); + } + break; + case TGSI_OPCODE_ARL: + src1 = bld_imm_u32(bld, 4); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src0 = bld_insn_1(bld, NV_OP_FLOOR, src0); + src0->insn->ext.cvt.d = NV_TYPE_S32; + src0->insn->ext.cvt.s = NV_TYPE_F32; + dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1); + } + break; + case TGSI_OPCODE_CMP: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + src2 = emit_fetch(bld, insn, 2, c); + dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0); + dst0[c]->insn->set_cond = NV_CC_LT; + } + break; + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_PRESIN, src0); + if (insn->Dst[0].Register.WriteMask & 7) + temp = bld_insn_1(bld, opcode, temp); + for (c = 0; c < 3; ++c) + if (insn->Dst[0].Register.WriteMask & (1 << c)) + dst0[c] = temp; + if (!(insn->Dst[0].Register.WriteMask & (1 << 3))) + break; + src0 = emit_fetch(bld, insn, 0, 3); + temp = bld_insn_1(bld, NV_OP_PRESIN, src0); + dst0[3] = bld_insn_1(bld, opcode, temp); + break; + case TGSI_OPCODE_DP2: + temp = bld_dot(bld, insn, 2); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DP3: + temp = bld_dot(bld, insn, 3); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DP4: + temp = bld_dot(bld, insn, 4); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DPH: + src0 = bld_dot(bld, insn, 3); + src1 = emit_fetch(bld, insn, 1, 3); + temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DST: + if (insn->Dst[0].Register.WriteMask & 1) + dst0[0] = bld_imm_f32(bld, 1.0f); + if (insn->Dst[0].Register.WriteMask & 2) { + src0 = emit_fetch(bld, insn, 0, 1); + src1 = emit_fetch(bld, insn, 1, 1); + dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + } + if (insn->Dst[0].Register.WriteMask & 4) + dst0[2] = emit_fetch(bld, insn, 0, 2); + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = emit_fetch(bld, insn, 1, 3); + break; + case TGSI_OPCODE_EXP: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_FLOOR, src0); + + if (insn->Dst[0].Register.WriteMask & 2) + dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp); + if (insn->Dst[0].Register.WriteMask & 1) { + temp = bld_insn_1(bld, NV_OP_PREEX2, temp); + dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); + } + if (insn->Dst[0].Register.WriteMask & 4) { + temp = bld_insn_1(bld, NV_OP_PREEX2, src0); + dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); + } + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + case TGSI_OPCODE_EX2: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_PREEX2, src0); + temp = bld_insn_1(bld, NV_OP_EX2, temp); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_FRC: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0); + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]); + } + break; + case TGSI_OPCODE_KIL: + for (c = 0; c < 4; ++c) + bld_kil(bld, emit_fetch(bld, insn, 0, c)); + break; + case TGSI_OPCODE_KILP: + (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1; + break; + case TGSI_OPCODE_IF: + { + struct nv_basic_block *b = new_basic_block(bld->pc); + struct nv_value *pred = emit_fetch(bld, insn, 0, 0); + + assert(bld->cond_lvl < BLD_MAX_COND_NESTING); + + nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD); + + bld->join_bb[bld->cond_lvl] = bld->pc->current_block; + bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; + + if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { + pred = bld_clone(bld, pred->insn); + pred->reg.size = 1; + pred->reg.file = NV_FILE_PRED; + if (pred->insn->opcode == NV_OP_FSET_F32) + pred->insn->opcode = NV_OP_SET_F32; + } else { + pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, + pred, bld->zero); + } + assert(!mask); + + bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0)); + + ++bld->cond_lvl; + bld_new_block(bld, b); + } + break; + case TGSI_OPCODE_ELSE: + { + struct nv_basic_block *b = new_basic_block(bld->pc); + + --bld->cond_lvl; + nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); + + bld->cond_bb[bld->cond_lvl]->exit->target = b; + bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; + + new_instruction(bld->pc, NV_OP_BRA)->terminator = 1; + + ++bld->cond_lvl; + bld_new_block(bld, b); + } + break; + case TGSI_OPCODE_ENDIF: + { + struct nv_basic_block *b = new_basic_block(bld->pc); + + if (bld->pc->current_block->exit && + !bld->pc->current_block->exit->terminator) + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); + + --bld->cond_lvl; + nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); + nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); + + bld->cond_bb[bld->cond_lvl]->exit->target = b; + + bld_new_block(bld, b); + + if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) { + bld->join_bb[bld->cond_lvl]->exit->prev->target = b; + new_instruction(bld->pc, NV_OP_JOIN)->join = 1; + } + } + break; + case TGSI_OPCODE_BGNLOOP: + { + struct nv_basic_block *bl = new_basic_block(bld->pc); + struct nv_basic_block *bb = new_basic_block(bld->pc); + + assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); + + bld->loop_bb[bld->loop_lvl] = bl; + bld->brkt_bb[bld->loop_lvl] = bb; + + nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); + + bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); + + if (bld->loop_lvl == bld->pc->loop_nesting_bound) + bld->pc->loop_nesting_bound++; + + bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); + bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); + bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); + } + break; + case TGSI_OPCODE_BRK: + { + struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; + + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); + + if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); + + bld->out_kind = CFG_EDGE_FAKE; + } + break; + case TGSI_OPCODE_CONT: + { + struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; + + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); + + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + + if ((bb = bld->join_bb[bld->cond_lvl - 1])) { + bld->join_bb[bld->cond_lvl - 1] = NULL; + nvc0_insn_delete(bb->exit->prev); + } + bld->out_kind = CFG_EDGE_FAKE; + } + break; + case TGSI_OPCODE_ENDLOOP: + { + struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; + + if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); + + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + } + + bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ + + bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]); + } + break; + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + dst0[c] = bld_insn_1(bld, opcode, src0); + } + break; + case TGSI_OPCODE_LIT: + bld_lit(bld, dst0, insn); + break; + case TGSI_OPCODE_LRP: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + src2 = emit_fetch(bld, insn, 2, c); + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); + dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2); + } + break; + case TGSI_OPCODE_MOV: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = emit_fetch(bld, insn, 0, c); + break; + case TGSI_OPCODE_MAD: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + src2 = emit_fetch(bld, insn, 2, c); + dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2); + } + break; + case TGSI_OPCODE_POW: + src0 = emit_fetch(bld, insn, 0, 0); + src1 = emit_fetch(bld, insn, 1, 0); + temp = bld_pow(bld, src0, src1); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_LOG: + src0 = emit_fetch(bld, insn, 0, 0); + src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0); + temp = bld_insn_1(bld, NV_OP_LG2, src0); + dst0[2] = temp; + if (insn->Dst[0].Register.WriteMask & 3) { + temp = bld_insn_1(bld, NV_OP_FLOOR, temp); + dst0[0] = temp; + } + if (insn->Dst[0].Register.WriteMask & 2) { + temp = bld_insn_1(bld, NV_OP_PREEX2, temp); + temp = bld_insn_1(bld, NV_OP_EX2, temp); + temp = bld_insn_1(bld, NV_OP_RCP, temp); + dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp); + } + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_LG2: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, opcode, src0); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_RSQ: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_ABS_F32, src0); + temp = bld_insn_1(bld, NV_OP_RSQ, temp); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dst0[c] = bld_insn_2(bld, opcode, src0, src1); + dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode); + } + break; + case TGSI_OPCODE_SCS: + if (insn->Dst[0].Register.WriteMask & 0x3) { + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_PRESIN, src0); + if (insn->Dst[0].Register.WriteMask & 0x1) + dst0[0] = bld_insn_1(bld, NV_OP_COS, temp); + if (insn->Dst[0].Register.WriteMask & 0x2) + dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp); + } + if (insn->Dst[0].Register.WriteMask & 0x4) + dst0[2] = bld_imm_f32(bld, 0.0f); + if (insn->Dst[0].Register.WriteMask & 0x8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + case TGSI_OPCODE_SSG: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ + src0 = emit_fetch(bld, insn, 0, c); + src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src1->insn->set_cond = NV_CC_GT; + src2->insn->set_cond = NV_CC_LT; + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); + } + break; + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1); + } + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + bld_tex(bld, dst0, insn); + break; + case TGSI_OPCODE_XPD: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + if (c == 3) { + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + } + src0 = emit_fetch(bld, insn, 1, (c + 1) % 3); + src1 = emit_fetch(bld, insn, 0, (c + 2) % 3); + dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + + src0 = emit_fetch(bld, insn, 0, (c + 1) % 3); + src1 = emit_fetch(bld, insn, 1, (c + 2) % 3); + dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]); + + dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG; + } + break; + case TGSI_OPCODE_RET: + (new_instruction(bld->pc, NV_OP_RET))->fixed = 1; + break; + case TGSI_OPCODE_END: + /* VP outputs are exported in-place as scalars, optimization later */ + if (bld->pc->is_fragprog) + bld_export_fp_outputs(bld); + if (bld->ti->append_ucp) + bld_append_vp_ucp(bld); + return; + default: + NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); + abort(); + return; + } + + if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT && + !bld->pc->is_fragprog) { + struct nv_instruction *mi = NULL; + uint size; + + if (bld->ti->append_ucp) { + if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) { + bld->hpos_index = insn->Dst[0].Register.Index; + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]); + } + } + + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + + c = 0; + if ((mask & 0x3) == 0x3) { + mask &= ~0x3; + size = 8; + mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn; + } + if ((mask & 0xc) == 0xc) { + mask &= ~0xc; + if (mi) { + size = 16; + nv_reference(bld->pc, mi, 2, dst0[2]); + nv_reference(bld->pc, mi, 3, dst0[3]); + } else { + c = 2; + size = 8; + mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn; + } + } else + if (mi && (mask & 0x4)) { + size = 12; + mask &= ~0x4; + nv_reference(bld->pc, mi, 2, dst0[2]); + } + + if (mi) { + struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT); + int s; + + nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4)); + nv_reference(bld->pc, ex, 1, mi->def[0]); + + for (s = 1; s < size / 4; ++s) { + bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, ex, s + 1, mi->def[s]); + } + + ex->fixed = 1; + ex->src[0]->value->reg.size = size; + ex->src[0]->value->reg.address = + bld->ti->output_loc[insn->Dst[0].Register.Index][c]; + } + } + + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + emit_store(bld, insn, c, dst0[c]); +} + +static INLINE void +bld_free_registers(struct bld_register *base, int n) +{ + int i, c; + + for (i = 0; i < n; ++i) + for (c = 0; c < 4; ++c) + util_dynarray_fini(&base[i * 4 + c].vals); +} + +int +nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti) +{ + struct bld_context *bld = CALLOC_STRUCT(bld_context); + unsigned ip; + + pc->root[0] = pc->current_block = new_basic_block(pc); + + bld->pc = pc; + bld->ti = ti; + + pc->loop_nesting_bound = 1; + + bld->zero = new_value(pc, NV_FILE_GPR, 4); + bld->zero->reg.id = 63; + + if (pc->is_fragprog) { + struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4); + mem->reg.address = 0x7c; + + bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem); + bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]); + } + + for (ip = 0; ip < ti->num_insns; ++ip) + bld_instruction(bld, &ti->insns[ip]); + + bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS); + bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS); + bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS); + bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); + + FREE(bld); + return 0; +} + +/* If a variable is assigned in a loop, replace all references to the value + * from outside the loop with a phi value. + */ +static void +bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, + struct nv_value *old_val, + struct nv_value *new_val) +{ + struct nv_instruction *nvi; + + for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) { + int s; + for (s = 0; s < 6 && nvi->src[s]; ++s) + if (nvi->src[s]->value == old_val) + nv_reference(pc, nvi, s, new_val); + } + + b->pass_seq = pc->pass_seq; + + if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) + bld_replace_value(pc, b->out[0], old_val, new_val); + + if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) + bld_replace_value(pc, b->out[1], old_val, new_val); +} diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c new file mode 100644 index 0000000000..7bbfe057e5 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -0,0 +1,385 @@ + +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_transfer.h" + +#include "nv50/nv50_defs.xml.h" + +struct nvc0_transfer { + struct pipe_transfer base; + struct nvc0_m2mf_rect rect[2]; + uint32_t nblocksx; + uint16_t nblocksy; + uint16_t nlayers; +}; + +void +nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, + const struct nvc0_m2mf_rect *dst, + const struct nvc0_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_channel *chan = nouveau_screen(pscreen)->channel; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + uint32_t exec = (1 << 20); + + assert(dst->cpp == src->cpp); + + if (nouveau_bo_tile_layout(src->bo)) { + BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5); + OUT_RING (chan, src->tile_mode); + OUT_RING (chan, src->width * cpp); + OUT_RING (chan, src->height); + OUT_RING (chan, src->depth); + OUT_RING (chan, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_RING(chan, RING_MF(PITCH_IN), 1); + OUT_RING (chan, src->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_IN; + } + + if (nouveau_bo_tile_layout(dst->bo)) { + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); + OUT_RING (chan, dst->tile_mode); + OUT_RING (chan, dst->width * cpp); + OUT_RING (chan, dst->height); + OUT_RING (chan, dst->depth); + OUT_RING (chan, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_RING(chan, RING_MF(PITCH_OUT), 1); + OUT_RING (chan, dst->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_OUT; + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + MARK_RING (chan, 17, 4); + + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + + if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { + BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2); + OUT_RING (chan, src->x * cpp); + OUT_RING (chan, sy); + } else { + src_ofst += line_count * src->pitch; + } + if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); + OUT_RING (chan, dst->x * cpp); + OUT_RING (chan, dy); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, nblocksx * cpp); + OUT_RING (chan, line_count); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, exec); + + height -= line_count; + sy += line_count; + dy += line_count; + } +} + +void +nvc0_m2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, void *data) +{ + struct nouveau_channel *chan = nv->screen->channel; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + MARK_RING (chan, 8, 2); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, size); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, 0x100111); + + while (count) { + unsigned nr = AVAIL_RING(chan); + + if (nr < 9) { + FIRE_RING(chan); + nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR); + continue; + } + nr = MIN2(count, nr - 1); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_RING_NI(chan, RING_MF(DATA), nr); + OUT_RINGp (chan, src, nr); + + src += nr; + count -= nr; + } +} + +void +nvc0_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_channel *chan = nv->screen->channel; + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + MARK_RING (chan, 11, 4); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, bytes); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } +} + +static void +nvc0_m2mf_push_rect(struct pipe_screen *pscreen, + const struct nvc0_m2mf_rect *dst, + const void *data, + unsigned nblocksx, unsigned nblocksy) +{ + struct nouveau_channel *chan; + const uint8_t *src = (const uint8_t *)data; + const int cpp = dst->cpp; + const int line_len = nblocksx * cpp; + int dy = dst->y; + + assert(nouveau_bo_tile_layout(dst->bo)); + + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); + OUT_RING (chan, dst->tile_mode); + OUT_RING (chan, dst->width * cpp); + OUT_RING (chan, dst->height); + OUT_RING (chan, dst->depth); + OUT_RING (chan, dst->z); + + while (nblocksy) { + int line_count, words; + int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); + + if (size < (12 + words)) { + FIRE_RING(chan); + continue; + } + line_count = (size * 4) / line_len; + words = (line_count * line_len + 3) / 4; + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); + OUT_RING (chan, dst->x * cpp); + OUT_RING (chan, dy); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, line_len); + OUT_RING (chan, line_count); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); + + BEGIN_RING_NI(chan, RING_MF(DATA), words); + OUT_RINGp (chan, src, words); + + dy += line_count; + src += line_len * line_count; + nblocksy -= line_count; + } +} + +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct nvc0_context *nvc0 = nvc0_context(pctx); + struct pipe_screen *pscreen = pctx->screen; + struct nouveau_device *dev = nvc0->screen->base.device; + struct nvc0_miptree *mt = nvc0_miptree(res); + struct nvc0_miptree_level *lvl = &mt->level[level]; + struct nvc0_transfer *tx; + uint32_t size; + uint32_t w, h, d, z, layer; + int ret; + + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + if (mt->layout_3d) { + z = box->z; + d = u_minify(res->depth0, level); + layer = 0; + } else { + z = 0; + d = 1; + layer = box->z; + } + tx->nlayers = box->depth; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + w = u_minify(res->width0, level); + h = u_minify(res->height0, level); + + tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); + + tx->rect[0].bo = mt->base.bo; + tx->rect[0].base = lvl->offset + layer * mt->layer_stride; + tx->rect[0].tile_mode = lvl->tile_mode; + tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); + tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].z = z; + tx->rect[0].width = util_format_get_nblocksx(res->format, w); + tx->rect[0].height = util_format_get_nblocksy(res->format, h); + tx->rect[0].depth = d; + tx->rect[0].pitch = lvl->pitch; + tx->rect[0].domain = NOUVEAU_BO_VRAM; + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->nlayers, &tx->rect[1].bo); + if (ret) { + FREE(tx); + return NULL; + } + + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; + unsigned i; + for (i = 0; i < tx->nlayers; ++i) { + nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; + } + + return &tx->base; +} + +void +nvc0_miptree_transfer_del(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct pipe_screen *pscreen = pctx->screen; + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->nlayers; ++i) { + nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + } + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); +} + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + int ret; + unsigned flags = 0; + + if (tx->rect[1].bo->map) + return tx->rect[1].bo->map; + + if (transfer->usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (transfer->usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags); + if (ret) + return NULL; + return tx->rect[1].bo->map; +} + +void +nvc0_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + + nouveau_bo_unmap(tx->rect[1].bo); +} + diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h new file mode 100644 index 0000000000..803ee3463e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_transfer.h @@ -0,0 +1,44 @@ + +#ifndef __NVC0_TRANSFER_H__ +#define __NVC0_TRANSFER_H__ + +#include "pipe/p_state.h" + +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pcontext, + struct pipe_resource *pt, + unsigned level, + unsigned usage, + const struct pipe_box *box); +void +nvc0_miptree_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void * +nvc0_miptree_transfer_map(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +struct nvc0_m2mf_rect { + struct nouveau_bo *bo; + uint32_t base; + unsigned domain; + uint32_t pitch; + uint32_t width; + uint32_t x; + uint32_t height; + uint32_t y; + uint16_t depth; + uint16_t z; + uint16_t tile_mode; + uint16_t cpp; +}; + +void +nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, + const struct nvc0_m2mf_rect *dst, + const struct nvc0_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c new file mode 100644 index 0000000000..6bbcf2447e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -0,0 +1,653 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +void +nvc0_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) +{ + struct nvc0_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvc0_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + + so = MALLOC(sizeof(*so) + + num_elements * sizeof(struct nvc0_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_elts = 0; + so->instance_bufs = 0; + so->need_conversion = FALSE; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nvc0_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + return NULL; + } + so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; + } + so->element[i].state |= i; + + if (1) { + unsigned j = transkey.nr_elements++; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + } + } + } + + so->translate = translate_create(&transkey); + so->vtx_size = transkey.output_stride / 4; + so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1); + + return so; +} + +#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST + +#define VTX_ATTR(a, c, t, s) \ + ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \ + (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \ + ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \ + ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) + +static void +nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb, + struct pipe_vertex_element *ve, unsigned attr) +{ + const void *data; + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nv04_resource *res = nv04_resource(vb->buffer); + float v[4]; + int i; + const unsigned nc = util_format_get_nr_components(ve->src_format); + + data = nouveau_resource_map_offset(&nvc0->base, res, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_RD); + + util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1); + + BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1); + OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32)); + for (i = 0; i < nc; ++i) + OUT_RINGf(chan, v[i]); +} + +static INLINE void +nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { + /* TODO: use min and max instance divisor to get a proper range */ + *base = 0; + *size = nvc0->vtxbuf[vbi].buffer->width0; + } else { + assert(nvc0->vbo_max_index != ~0); + *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; + *size = (nvc0->vbo_max_index - + nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride; + } +} + +static void +nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) +{ + struct pipe_vertex_buffer *vb; + struct nv04_resource *buf; + int i; + uint32_t base, size; + + nvc0->vbo_fifo = nvc0->vbo_user = 0; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + vb = &nvc0->vtxbuf[i]; + if (!vb->stride) + continue; + buf = nv04_resource(vb->buffer); + + /* NOTE: user buffers with temporary storage count as mapped by GPU */ + if (!nouveau_resource_mapped_by_gpu(vb->buffer)) { + if (nvc0->vbo_push_hint) { + nvc0->vbo_fifo = ~0; + continue; + } else { + if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) { + nvc0->vbo_user |= 1 << i; + assert(vb->stride > vb->buffer_offset); + nvc0_vbuf_range(nvc0, i, &base, &size); + nouveau_user_buffer_upload(buf, base, size); + } else { + nouveau_buffer_migrate(&nvc0->base, buf, NOUVEAU_BO_GART); + } + nvc0->base.vbo_dirty = TRUE; + } + } + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD); + nouveau_buffer_adjust_score(&nvc0->base, buf, 1); + } +} + +static void +nvc0_update_user_vbufs(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t base, offset, size; + int i; + uint32_t written = 0; + + for (i = 0; i < nvc0->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + const int b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + struct nv04_resource *buf = nv04_resource(vb->buffer); + + if (!(nvc0->vbo_user & (1 << b))) + continue; + + if (!vb->stride) { + nvc0_emit_vtxattr(nvc0, vb, ve, i); + continue; + } + nvc0_vbuf_range(nvc0, b, &base, &size); + + if (!(written & (1 << b))) { + written |= 1 << b; + nouveau_user_buffer_upload(buf, base, size); + } + offset = vb->buffer_offset + ve->src_offset; + + MARK_RING (chan, 6, 4); + BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); + OUT_RING (chan, i); + OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); + } + nvc0->base.vbo_dirty = TRUE; +} + +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + uint32_t vbo_user = nvc0->vbo_user; + + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nouveau_buffer_release_gpu_storage(nv04_resource(nvc0->vtxbuf[i].buffer)); + } +} + +void +nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + struct pipe_vertex_buffer *vb; + struct nvc0_vertex_element *ve; + unsigned i; + + if (unlikely(vertex->need_conversion || NVC0_USING_EDGEFLAG(nvc0))) { + nvc0->vbo_fifo = ~0; + nvc0->vbo_user = 0; + } else { + nvc0_prevalidate_vbufs(nvc0); + } + + BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); + for (i = 0; i < vertex->num_elements; ++i) { + ve = &vertex->element[i]; + vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; + + if (likely(vb->stride) || nvc0->vbo_fifo) { + OUT_RING(chan, ve->state); + } else { + OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST); + nvc0->vbo_fifo &= ~(1 << i); + } + } + + for (i = 0; i < vertex->num_elements; ++i) { + struct nv04_resource *res; + unsigned size, offset; + + ve = &vertex->element[i]; + vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; + + if (unlikely(ve->pipe.instance_divisor)) { + if (!(nvc0->state.instance_elts & (1 << i))) { + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + } + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1); + OUT_RING (chan, ve->pipe.instance_divisor); + } else + if (unlikely(nvc0->state.instance_elts & (1 << i))) { + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); + } + + res = nv04_resource(vb->buffer); + + if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) { + if (!nvc0->vbo_fifo) + nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, 0); + continue; + } + + size = vb->buffer->width0; + offset = ve->pipe.src_offset + vb->buffer_offset; + + MARK_RING (chan, 8, 4); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, (1 << 12) | vb->stride); + BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); + OUT_RING (chan, i); + OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); + } + for (; i < nvc0->state.num_vtxelts; ++i) { + BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); + OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, 0); + } + + nvc0->state.num_vtxelts = vertex->num_elements; + nvc0->state.instance_elts = vertex->instance_elts; +} + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +static void +nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + nouveau_fence_update(&nvc0->screen->base, TRUE); + + nvc0_bufctx_emit_relocs(nvc0); +} + +static void +nvc0_draw_arrays(struct nvc0_context *nvc0, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + unsigned prim; + + chan->flush_notify = nvc0_draw_vbo_flush_notify; + chan->user_private = nvc0; + + prim = nvc0_prim_gl(mode); + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, prim); + BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + chan->flush_notify = nvc0_default_flush_notify; +} + +static void +nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 3) { + unsigned i; + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + OUT_RING(chan, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } +} + +static void +nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count &= ~1; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); + OUT_RINGp (chan, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + void *data; + unsigned prim; + const unsigned index_size = nvc0->idxbuf.index_size; + + chan->flush_notify = nvc0_draw_vbo_flush_notify; + chan->user_private = nvc0; + + prim = nvc0_prim_gl(mode); + + if (index_bias != nvc0->state.index_bias) { + BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1); + OUT_RING (chan, index_bias); + nvc0->state.index_bias = index_bias; + } + + if (nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { + struct nv04_resource *res = nv04_resource(nvc0->idxbuf.buffer); + unsigned offset = nvc0->idxbuf.offset; + unsigned limit = nvc0->idxbuf.buffer->width0 - 1; + + nouveau_buffer_adjust_score(&nvc0->base, res, 1); + + while (instance_count--) { + MARK_RING (chan, 11, 4); + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, mode); + BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD); + OUT_RING (chan, index_size >> 1); + OUT_RING (chan, start); + OUT_RING (chan, count); + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + nvc0_resource_fence(res, NOUVEAU_BO_RD); + + mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } else { + data = nouveau_resource_map_offset(&nvc0->base, + nv04_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); + if (!data) + return; + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, prim); + switch (index_size) { + case 1: + nvc0_draw_elements_inline_u08(chan, data, start, count); + break; + case 2: + nvc0_draw_elements_inline_u16(chan, data, start, count); + break; + case 4: + if (shorten) + nvc0_draw_elements_inline_u32_short(chan, data, start, count); + else + nvc0_draw_elements_inline_u32(chan, data, start, count); + break; + default: + assert(0); + return; + } + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } + + chan->flush_notify = nvc0_default_flush_notify; +} + +void +nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nvc0->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && + ((info->max_index - info->min_index + 64) < info->count)); + + nvc0->vbo_min_index = info->min_index; + nvc0->vbo_max_index = info->max_index; + + if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo) + nvc0->dirty |= NVC0_NEW_ARRAYS; + + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) + nvc0_update_user_vbufs(nvc0); + + nvc0_state_validate(nvc0); + + if (nvc0->vbo_fifo) { + nvc0_push_vbo(nvc0, info); + return; + } + + if (nvc0->state.instance_base != info->start_instance) { + nvc0->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ + BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1); + OUT_RING (chan, info->start_instance); + } + + if (nvc0->base.vbo_dirty) { + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1); + OUT_RING (chan, 0); + nvc0->base.vbo_dirty = FALSE; + } + + if (!info->indexed) { + nvc0_draw_arrays(nvc0, + info->mode, info->start, info->count, + info->instance_count); + } else { + boolean shorten = info->max_index <= 65535; + + assert(nvc0->idxbuf.buffer); + + if (info->primitive_restart != nvc0->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); + OUT_RING (chan, 1); + OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0); + } + nvc0->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1); + OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nvc0_draw_elements(nvc0, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } + + nvc0_release_user_vbufs(nvc0); +} diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h new file mode 100644 index 0000000000..6519ce8e19 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -0,0 +1,120 @@ + +#ifndef __NVC0_WINSYS_H__ +#define __NVC0_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_reloc.h" + +#include "nvc0_resource.h" /* OUT_RESRC */ + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + +#define NVC0_SUBCH_3D 1 +#define NVC0_SUBCH_2D 2 +#define NVC0_SUBCH_MF 3 + +#define NVC0_MF_(n) NVC0_M2MF_##n + +#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2)) +#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2)) +#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2)) + +#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2)) +#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2)) +#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2)) + +#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2)) + +int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); + +static inline uint32_t +nouveau_bo_tile_layout(struct nouveau_bo *bo) +{ + return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK; +} + +static INLINE void +nouveau_bo_validate(struct nouveau_channel *chan, + struct nouveau_bo *bo, unsigned flags) +{ + nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0); +} + +/* incremental methods */ +static INLINE void +BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd); +} + +/* non-incremental */ +static INLINE void +BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd); +} + +/* increment-once */ +static INLINE void +BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0xa << 28) | (size << 16) | mthd); +} + +/* inline-data */ +static INLINE void +IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data) +{ + WAIT_RING(chan, 1); + OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd); +} + +static INLINE int +OUT_RESRCh(struct nouveau_channel *chan, struct nv04_resource *res, + unsigned delta, unsigned flags) +{ + return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE int +OUT_RESRCl(struct nouveau_channel *chan, struct nv04_resource *res, + unsigned delta, unsigned flags) +{ + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE void +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s) +{ + struct nouveau_subchannel *subc = &gr->channel->subc[s]; + + assert(s < 8); + if (subc->gr) { + assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT); + subc->gr->bound = NOUVEAU_GROBJ_UNBOUND; + } + subc->gr = gr; + subc->gr->subc = s; + subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT; + + BEGIN_RING(chan, RING_GR(gr, 0x0000), 1); + OUT_RING (chan, gr->grclass); +} + +#endif diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 46bb082388..a3b76ac61b 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ nvfx_vertprog.c LIBRARY_INCLUDES = \ + $(LIBDRM_CFLAGS) \ -I$(TOP)/src/gallium/drivers/nouveau/include include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c index e0e65e7a87..e2fadd33e1 100644 --- a/src/gallium/drivers/nvfx/nv04_2d.c +++ b/src/gallium/drivers/nvfx/nv04_2d.c @@ -34,11 +34,11 @@ #include <stdio.h> #include <stdint.h> #include <nouveau/nouveau_device.h> -#include <nouveau/nouveau_pushbuf.h> #include <nouveau/nouveau_channel.h> #include <nouveau/nouveau_bo.h> #include <nouveau/nouveau_notifier.h> #include <nouveau/nouveau_grobj.h> +#include <nouveau/nv04_pushbuf.h> #include "nv04_2d.h" #include "nouveau/nv_object.xml.h" diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index 951fb202ed..b609891d31 100644 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned txf; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; unsigned use_rect; @@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)]; MARK_RING(chan, 9, 2); - OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8); OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index e8ab403f72..563183d9d0 100644 --- a/src/gallium/drivers/nvfx/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -76,6 +76,7 @@ void nv40_fragtex_set(struct nvfx_context *nvfx, int unit) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; @@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) txf = sv->u.nv40.fmt[ps->compare] | ps->fmt; MARK_RING(chan, 11, 2); - OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8); OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); @@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) OUT_RING(chan, ps->filt | sv->filt); OUT_RING(chan, sv->npot_size); OUT_RING(chan, ps->bcol); - OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1)); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1); OUT_RING(chan, sv->u.nv40.npot_size2); nvfx->hw_txf[unit] = txf; diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 95834d2327..2bcb93d93e 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -7,21 +7,22 @@ #include "nvfx_resource.h" static void -nvfx_flush(struct pipe_context *pipe, unsigned flags, +nvfx_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; /* XXX: we need to actually be intelligent here */ - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(0x1fd8, 1)); + /* XXX This flag wasn't set by the state tracker anyway. */ + /*if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(0x1fd8, 1)); + BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING(chan, 1); - } + }*/ FIRE_RING(chan); if (fence) diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 6ef2a6945d..dad912b2ae 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -18,6 +18,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_resource.h" #include "nv30-40_3d.xml.h" #include "nvfx_state.h" @@ -339,30 +340,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx); /* nvfx_push.c */ extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); -/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */ -static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp) +static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, + struct nouveau_grobj *eng3d, unsigned attrib, const float* v, + unsigned ncomp) { switch (ncomp) { case 4: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4); OUT_RING(chan, fui(v[0])); OUT_RING(chan, fui(v[1])); OUT_RING(chan, fui(v[2])); OUT_RING(chan, fui(v[3])); break; case 3: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3); OUT_RING(chan, fui(v[0])); OUT_RING(chan, fui(v[1])); OUT_RING(chan, fui(v[2])); break; case 2: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2); OUT_RING(chan, fui(v[0])); OUT_RING(chan, fui(v[1])); break; case 1: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1); OUT_RING(chan, fui(v[0])); break; } diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 61f888a8ea..81f1ec485d 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags) struct nvfx_render_stage *rs = nvfx_render_stage(stage); struct nvfx_context *nvfx = rs->nvfx; struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { - assert(AVAIL_RING(chan) >= 2); - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP; } @@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; boolean no_elements = nvfx->vertprog->draw_no_elements; unsigned num_attribs = nvfx->vertprog->draw_elements; @@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); } @@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, int i; for(i = 0; i < 32; ++i) { - OUT_RING(chan, RING_3D(0x1dac, 1)); + BEGIN_RING(chan, eng3d, 0x1dac, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING (chan, mode); rs->prim = mode; } - OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count)); if(no_elements) { + BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4); OUT_RING(chan, 0); OUT_RING(chan, 0); OUT_RING(chan, 0); OUT_RING(chan, 0); } else { + BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count); for (unsigned i = 0; i < count; ++i) { struct vertex_header* v = prim->v[i]; diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 13e8beed47..dbd7c77346 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1189,12 +1189,12 @@ out_err: static inline void nvfx_fp_memcpy(void* dst, const void* src, size_t len) { -#ifndef WORDS_BIGENDIAN +#ifndef PIPE_ARCH_BIG_ENDIAN memcpy(dst, src, len); #else size_t i; for(i = 0; i < len; i += 4) { - uint32_t v = (uint32_t*)((char*)src + i); + uint32_t v = *(uint32_t*)((char*)src + i); *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); } #endif @@ -1233,6 +1233,7 @@ void nvfx_fragprog_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; struct nvfx_vertex_program* vp; @@ -1499,17 +1500,17 @@ update: nvfx->hw_fragprog = fp; MARK_RING(chan, 8, 1); - OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1); OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0, NV30_3D_FP_ACTIVE_PROGRAM_DMA1); - OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1); OUT_RING(chan, fp->fp_control); if(!nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1); OUT_RING(chan, (1<<16)|0x4); - OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1); OUT_RING(chan, fp->samplers); } } @@ -1518,8 +1519,7 @@ update: unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization; if(pointsprite_control != nvfx->hw_pointsprite_control) { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1); OUT_RING(chan, pointsprite_control); nvfx->hw_pointsprite_control = pointsprite_control; } diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index fd0aff6a1a..1c4901df0e 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -177,6 +177,7 @@ void nvfx_fragtex_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned samplers, unit; samplers = nvfx->dirty_samplers; @@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) else nv40_fragtex_set(nvfx, unit); } else { - WAIT_RING(chan, 2); /* this is OK for nv40 too */ - OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1); OUT_RING(chan, 0); nvfx->hw_samplers &= ~(1 << unit); } diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 8c043b867b..1be84b90a9 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -138,11 +138,11 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso // on our current driver (and the driver too), format support does not depend on geometry, so don't bother computing it // TODO: may want to revisit this - if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET, 0)) + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET)) mt->base.base.bind &=~ PIPE_BIND_RENDER_TARGET; - if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW)) mt->base.base.bind &=~ PIPE_BIND_SAMPLER_VIEW; - if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL, 0)) + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL)) mt->base.base.bind &=~ PIPE_BIND_DEPTH_STENCIL; return mt; diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c index ebf47e6ed3..6391741a2e 100644 --- a/src/gallium/drivers/nvfx/nvfx_push.c +++ b/src/gallium/drivers/nvfx/nvfx_push.c @@ -10,6 +10,7 @@ struct push_context { struct nouveau_channel* chan; + struct nouveau_grobj *eng3d; void *idxbuf; int32_t idxbias; @@ -27,9 +28,10 @@ static void emit_edgeflag(void *priv, boolean enabled) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; - OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1); OUT_RING(chan, enabled ? 1 : 0); } @@ -37,6 +39,7 @@ static void emit_vertices_lookup8(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; uint8_t* elts = (uint8_t*)ctx->idxbuf + start; while(count) @@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -57,6 +60,7 @@ static void emit_vertices_lookup16(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; uint16_t* elts = (uint16_t*)ctx->idxbuf + start; while(count) @@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -77,6 +81,7 @@ static void emit_vertices_lookup32(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; uint32_t* elts = (uint32_t*)ctx->idxbuf + start; while(count) @@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -97,13 +102,14 @@ static void emit_vertices(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; while(count) { unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -116,10 +122,11 @@ static void emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; unsigned nr = (vc & 0xff); if (nr) { - OUT_RING(chan, RING_3D(reg, 1)); + BEGIN_RING(chan, eng3d, reg, 1); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) nr -= push; - OUT_RING(chan, RING_3D_NI(reg, push)); + BEGIN_RING_NI(chan, eng3d, reg, push); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; @@ -154,12 +161,13 @@ static INLINE void emit_elt8(void* priv, unsigned start, unsigned vc) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; uint8_t *elts = (uint8_t *)ctx->idxbuf + start; int idxbias = ctx->idxbias; if (vc & 1) { - OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc) unsigned i; unsigned push = MIN2(vc, 2047 * 2); - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); @@ -181,12 +189,13 @@ static INLINE void emit_elt16(void* priv, unsigned start, unsigned vc) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; uint16_t *elts = (uint16_t *)ctx->idxbuf + start; int idxbias = ctx->idxbias; if (vc & 1) { - OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc) unsigned i; unsigned push = MIN2(vc, 2047 * 2); - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); @@ -208,6 +217,7 @@ static INLINE void emit_elt32(void* priv, unsigned start, unsigned vc) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; uint32_t *elts = (uint32_t *)ctx->idxbuf + start; int idxbias = ctx->idxbias; @@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc) while (vc) { unsigned push = MIN2(vc, 2047); - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push)); - assert(AVAIL_RING(chan) >= push); + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push); if(idxbias) { for(unsigned i = 0; i < push; ++i) @@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct push_context ctx; struct util_split_prim s; unsigned instances_left = info->instance_count; @@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + 4; /* potential edgeflag enable/disable */ ctx.chan = nvfx->screen->base.channel; + ctx.eng3d = nvfx->screen->eng3d; ctx.translate = nvfx->vtxelt->translate; ctx.idxbuf = NULL; ctx.vertex_length = nvfx->vtxelt->vertex_length; @@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); - WAIT_RING(chan, 5); - nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + nvfx_emit_vtx_attr(chan, eng3d, + nvfx->vtxelt->per_instance[i].base.idx, v, + nvfx->vtxelt->per_instance[i].base.ncomp); } /* per-instance loop */ @@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) int i; for(i = 0; i < 32; ++i) { - OUT_RING(chan, RING_3D(0x1dac, 1)); + BEGIN_RING(chan, eng3d, + 0x1dac, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, + NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, hw_mode); done = util_split_prim_next(&s, max_verts); - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, + NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, 0); if(done) @@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) per_instance[i].step = 0; nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); - WAIT_RING(chan, 5); - nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + nvfx_emit_vtx_attr(chan, eng3d, + nvfx->vtxelt->per_instance[i].base.idx, + v, + nvfx->vtxelt->per_instance[i].base.ncomp); } } } diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c index 3935ffd7f9..3cd6bf1e47 100644 --- a/src/gallium/drivers/nvfx/nvfx_query.c +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nvfx_query *q = nvfx_query(pq); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; uint64_t tmp; assert(!nvfx->query); @@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nouveau_notifier_reset(nvfx->screen->query, q->object->start); - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1); OUT_RING(chan, 1); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 1); q->ready = FALSE; @@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_query *q = nvfx_query(pq); assert(nvfx->query == pq); - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1); OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) | ((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT)); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 0); FIRE_RING(chan); diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c index c60a7bb8b9..42e77c5362 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.c +++ b/src/gallium/drivers/nvfx/nvfx_resource.c @@ -4,13 +4,6 @@ #include "nvfx_resource.h" #include "nouveau/nouveau_screen.h" -static unsigned int -nvfx_resource_is_referenced(struct pipe_context *pipe, - struct pipe_resource *pr, - unsigned level, int layer) -{ - return !!nouveau_reference_flags(nvfx_resource(pr)->bo); -} static struct pipe_resource * nvfx_resource_create(struct pipe_screen *screen, @@ -58,8 +51,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen, void nvfx_init_resource_functions(struct pipe_context *pipe) { - pipe->is_resource_referenced = nvfx_resource_is_referenced; - pipe->create_surface = nvfx_miptree_surface_new; pipe->surface_destroy = nvfx_miptree_surface_del; } diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 92e1d33090..9e6b9d6ef8 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -202,7 +202,7 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bind, unsigned geom_flags) + unsigned bind) { struct nvfx_screen *screen = nvfx_screen(pscreen); @@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) static void nv30_screen_init(struct nvfx_screen *screen) { struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; int i; /* TODO: perhaps we should do some of this on nv40 too? */ for (i=1; i<8; i++) { - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1); OUT_RING(chan, 0); } - OUT_RING(chan, RING_3D(0x220, 1)); + BEGIN_RING(chan, eng3d, 0x220, 1); OUT_RING(chan, 1); - OUT_RING(chan, RING_3D(0x03b0, 1)); + BEGIN_RING(chan, eng3d, 0x03b0, 1); OUT_RING(chan, 0x00100000); - OUT_RING(chan, RING_3D(0x1454, 1)); + BEGIN_RING(chan, eng3d, 0x1454, 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(0x1d80, 1)); + BEGIN_RING(chan, eng3d, 0x1d80, 1); OUT_RING(chan, 3); - OUT_RING(chan, RING_3D(0x1450, 1)); + BEGIN_RING(chan, eng3d, 0x1450, 1); OUT_RING(chan, 0x00030004); /* NEW */ - OUT_RING(chan, RING_3D(0x1e98, 1)); + BEGIN_RING(chan, eng3d, 0x1e98, 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(0x17e0, 3)); + BEGIN_RING(chan, eng3d, 0x17e0, 3); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(1.0)); - OUT_RING(chan, RING_3D(0x1f80, 16)); + BEGIN_RING(chan, eng3d, 0x1f80, 16); for (i=0; i<16; i++) { OUT_RING(chan, (i==8) ? 0x0000ffff : 0); } - OUT_RING(chan, RING_3D(0x120, 3)); + BEGIN_RING(chan, eng3d, 0x120, 3); OUT_RING(chan, 0); OUT_RING(chan, 1); OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(0x1d88, 1)); + BEGIN_RING(chan, eng3d, 0x1d88, 1); OUT_RING(chan, 0x00001200); - OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(1.0)); - OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1); OUT_RING(chan, 0xffff0000); /* enables use of vp rather than fixed-function somehow */ - OUT_RING(chan, RING_3D(0x1e94, 1)); + BEGIN_RING(chan, eng3d, 0x1e94, 1); OUT_RING(chan, 0x13); } static void nv40_screen_init(struct nvfx_screen *screen) { struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2)); + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2); OUT_RING(chan, screen->base.channel->vram->handle); OUT_RING(chan, screen->base.channel->vram->handle); - OUT_RING(chan, RING_3D(0x1450, 1)); + BEGIN_RING(chan, eng3d, 0x1450, 1); OUT_RING(chan, 0x00000004); - OUT_RING(chan, RING_3D(0x1ea4, 3)); + BEGIN_RING(chan, eng3d, 0x1ea4, 3); OUT_RING(chan, 0x00000010); OUT_RING(chan, 0x01000100); OUT_RING(chan, 0xff800006); /* vtxprog output routing */ - OUT_RING(chan, RING_3D(0x1fc4, 1)); + BEGIN_RING(chan, eng3d, 0x1fc4, 1); OUT_RING(chan, 0x06144321); - OUT_RING(chan, RING_3D(0x1fc8, 2)); + BEGIN_RING(chan, eng3d, 0x1fc8, 2); OUT_RING(chan, 0xedcba987); OUT_RING(chan, 0x0000006f); - OUT_RING(chan, RING_3D(0x1fd0, 1)); + BEGIN_RING(chan, eng3d, 0x1fd0, 1); OUT_RING(chan, 0x00171615); - OUT_RING(chan, RING_3D(0x1fd4, 1)); + BEGIN_RING(chan, eng3d, 0x1fd4, 1); OUT_RING(chan, 0x001b1a19); - OUT_RING(chan, RING_3D(0x1ef8, 1)); + BEGIN_RING(chan, eng3d, 0x1ef8, 1); OUT_RING(chan, 0x0020ffff); - OUT_RING(chan, RING_3D(0x1d64, 1)); + BEGIN_RING(chan, eng3d, 0x1d64, 1); OUT_RING(chan, 0x01d300d4); - OUT_RING(chan, RING_3D(0x1e94, 1)); + BEGIN_RING(chan, eng3d, 0x1e94, 1); OUT_RING(chan, 0x00000001); - OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1); OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN); } @@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static eng3d initialisation */ /* note that we just started using the channel, so we must have space in the pushbuffer */ - OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1); OUT_RING(chan, screen->sync->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1); OUT_RING(chan, chan->vram->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2); OUT_RING(chan, 0); OUT_RING(chan, screen->query->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 54619037d8..f3dcb205c6 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, { struct nvfx_context *nvfx = nvfx_context(pipe); - nvfx->constbuf[shader] = buf; + pipe_resource_reference(&nvfx->constbuf[shader], buf); nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; if (shader == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 501fdd4430..ae9c31418c 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -7,11 +7,11 @@ void nvfx_state_viewport_validate(struct nvfx_context *nvfx) { struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_viewport_state *vpt = &nvfx->viewport; - WAIT_RING(chan, 11); if(nvfx->render_mode == HW) { - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8); OUT_RINGf(chan, vpt->translate[0]); OUT_RINGf(chan, vpt->translate[1]); OUT_RINGf(chan, vpt->translate[2]); @@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) OUT_RINGf(chan, vpt->scale[1]); OUT_RINGf(chan, vpt->scale[2]); OUT_RINGf(chan, vpt->scale[3]); - OUT_RING(chan, RING_3D(0x1d78, 1)); + BEGIN_RING(chan, eng3d, 0x1d78, 1); OUT_RING(chan, 1); } else { - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8); OUT_RINGf(chan, 0.0f); OUT_RINGf(chan, 0.0f); OUT_RINGf(chan, 0.0f); @@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) OUT_RINGf(chan, 1.0f); OUT_RINGf(chan, 1.0f); OUT_RINGf(chan, 1.0f); - OUT_RING(chan, RING_3D(0x1d78, 1)); + BEGIN_RING(chan, eng3d, 0x1d78, 1); OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); } } @@ -41,6 +41,7 @@ void nvfx_state_scissor_validate(struct nvfx_context *nvfx) { struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; struct pipe_scissor_state *s = &nvfx->scissor; @@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx) return; nvfx->state.scissor_enabled = rast->scissor; - WAIT_RING(chan, 3); - OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2); if (nvfx->state.scissor_enabled) { OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); @@ -63,12 +63,12 @@ void nvfx_state_sr_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_stencil_ref *sr = &nvfx->stencil_ref; - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1); OUT_RING(chan, sr->ref_value[0]); - OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1); OUT_RING(chan, sr->ref_value[1]); } @@ -76,10 +76,10 @@ void nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_blend_color *bcol = &nvfx->blend_colour; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1); OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | (float_to_ubyte(bcol->color[0]) << 16) | (float_to_ubyte(bcol->color[1]) << 8) | @@ -90,9 +90,9 @@ void nvfx_state_stipple_validate(struct nvfx_context *nvfx) { struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - WAIT_RING(chan, 33); - OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32)); + BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32); OUT_RINGp(chan, nvfx->stipple, 32); } @@ -100,12 +100,12 @@ static void nvfx_coord_conventions_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned value = nvfx->hw_fragprog->coord_conventions; if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED) value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1); OUT_RING(chan, value); } @@ -113,6 +113,7 @@ static void nvfx_ucp_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned enables[7] = { 0, @@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx) if(!nvfx->use_vp_clipping) { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1); OUT_RING(chan, 0); - WAIT_RING(chan, 6 * 4 + 1); - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0), + nvfx->clip.nr * 4); OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4); } - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1); OUT_RING(chan, enables[nvfx->clip.nr]); } @@ -144,38 +143,37 @@ static void nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned i; struct nvfx_vertex_program* vp = nvfx->hw_vertprog; if(nvfx->clip.nr != vp->clip_nr) { unsigned idx; - WAIT_RING(chan, 14); /* remove last instruction bit */ if(vp->clip_nr >= 0) { idx = vp->nr_insns - 7 + vp->clip_nr; - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); OUT_RING(chan, vp->exec->start + idx); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); OUT_RINGp (chan, vp->insns[idx].data, 4); } /* set last instruction bit */ idx = vp->nr_insns - 7 + nvfx->clip.nr; - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); OUT_RING(chan, vp->exec->start + idx); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); OUT_RINGp(chan, vp->insns[idx].data, 3); OUT_RING(chan, vp->insns[idx].data[3] | 1); vp->clip_nr = nvfx->clip.nr; } // TODO: only do this for the ones changed - WAIT_RING(chan, 6 * 6); for(i = 0; i < nvfx->clip.nr; ++i) { - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5); OUT_RING(chan, vp->data->start + i); OUT_RINGp (chan, nvfx->clip.ucp[i], 4); } @@ -185,6 +183,7 @@ static boolean nvfx_state_validate_common(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned dirty; unsigned still_dirty = 0; int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */ @@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(vp_output != nvfx->hw_vp_output) { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1); OUT_RING(chan, vp_output); nvfx->hw_vp_output = vp_output; } @@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0)) { - WAIT_RING(chan, 3); - OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2); OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); } @@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) // TODO: what about nv30? if(nvfx->is_nv4x) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); OUT_RING(chan, 1); } } @@ -430,7 +426,7 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); warned = TRUE; } - nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->pipe.flush(&nvfx->pipe, NULL); nvfx->dirty |= (NVFX_NEW_VIEWPORT | NVFX_NEW_VERTPROG | NVFX_NEW_ARRAYS); diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 816bb89f2c..f9fed94044 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; uint32_t rt_enable, rt_format; int i; unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; @@ -204,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch); - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1); OUT_RELOC(chan, rt0->bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2); OUT_RING(chan, pitch); OUT_RELOC(chan, rt0->bo, rt0->offset, rt_flags | NOUVEAU_BO_LOW, @@ -216,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) } if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) { - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1); OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2); OUT_RELOC(chan, nvfx->hw_rt[1].bo, nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); @@ -230,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) if(nvfx->is_nv4x) { if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) { - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1); OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1); OUT_RELOC(chan, nvfx->hw_rt[2].bo, nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1); OUT_RING(chan, nvfx->hw_rt[2].pitch); } if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) { - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1); OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1); OUT_RELOC(chan, nvfx->hw_rt[3].bo, nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1); OUT_RING(chan, nvfx->hw_rt[3].pitch); } } if (fb->zsbuf) { - OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1); OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1); /* TODO: reverse engineer LMA */ OUT_RELOC(chan, nvfx->hw_zeta.bo, nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1); OUT_RING(chan, nvfx->hw_zeta.pitch); } } else if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1); OUT_RING(chan, 64); } - OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1); OUT_RING(chan, rt_enable); - OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3)); + BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3); OUT_RING(chan, (w << 16) | 0); OUT_RING(chan, (h << 16) | 0); OUT_RING(chan, rt_format); - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2); OUT_RING(chan, (w << 16) | 0); OUT_RING(chan, (h << 16) | 0); - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2); OUT_RING(chan, ((w - 1) << 16) | 0); OUT_RING(chan, ((h - 1) << 16) | 0); if(!nvfx->is_nv4x) { /* Wonder why this is needed, context should all be set to zero on init */ /* TODO: we can most likely remove this, after putting it in context init */ - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1); OUT_RING(chan, 0); } nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 6fd6c47081..be31853d71 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy) if(nvfx->query && !nvfx->blitters_in_use) { struct nouveau_channel* chan = nvfx->screen->base.channel; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 0); } @@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter) if(nvfx->query && !nvfx->blitters_in_use) { struct nouveau_channel* chan = nvfx->screen->base.channel; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 1); } } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 597664e771..b72379d653 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_transfer.h" #include "translate/translate.h" #include "nvfx_context.h" @@ -9,8 +10,7 @@ #include "nvfx_resource.h" #include "nouveau/nouveau_channel.h" - -#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nv04_pushbuf.h" static inline unsigned util_guess_unique_indices_count(unsigned mode, unsigned indices) @@ -247,6 +247,7 @@ boolean nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; int i; int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; @@ -262,11 +263,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); float v[4]; ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0); - nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp); + nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp); } - OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements); if(nvfx->use_vertex_buffers) { unsigned idx = 0; @@ -297,12 +298,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) unsigned i; /* seems to be some kind of cache flushing */ for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); + BEGIN_RING(chan, eng3d, 0x1718, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements); if(nvfx->use_vertex_buffers) { unsigned idx = 0; @@ -330,7 +331,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) OUT_RING(chan, 0); } - OUT_RING(chan, RING_3D(0x1710, 1)); + BEGIN_RING(chan, eng3d, 0x1710, 1); OUT_RING(chan, 0); nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; @@ -342,15 +343,14 @@ void nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned num_outputs = nvfx->vertprog->draw_elements; int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr); if (!elements) return; - WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2); - - OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements); for(unsigned i = 0; i < num_outputs; ++i) OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT); for(unsigned i = num_outputs; i < elements; ++i) @@ -360,16 +360,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) unsigned i; /* seems to be some kind of cache flushing */ for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); + BEGIN_RING(chan, eng3d, 0x1718, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements); for (unsigned i = 0; i < elements; i++) OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(0x1710, 1)); + BEGIN_RING(chan, eng3d, 0x1710, 1); OUT_RING(chan, 0); nvfx->hw_vtxelt_nr = num_outputs; @@ -592,18 +592,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, { struct nvfx_context *nvfx = nvfx_context(pipe); - for(unsigned i = 0; i < count; ++i) - { - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); - nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; - nvfx->vtxbuf[i].max_index = vb[i].max_index; - nvfx->vtxbuf[i].stride = vb[i].stride; - } - - for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); + util_copy_vertex_buffers(nvfx->vtxbuf, + &nvfx->vtxbuf_nr, + vb, count); - nvfx->vtxbuf_nr = count; nvfx->use_vertex_buffers = -1; nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } @@ -640,4 +632,6 @@ nvfx_init_vbo_functions(struct nvfx_context *nvfx) nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index e543fda50e..a11941f3d5 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) { struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog; struct nvfx_vertex_program* vp; struct pipe_resource *constbuf; @@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } */ - WAIT_RING(chan, 6 * vp->nr_consts); for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; @@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5); OUT_RING(chan, i + vp->data->start); OUT_RINGp(chan, (uint32_t *)vpd->value, 4); } @@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) /* Upload vtxprog */ if (upload_code) { - WAIT_RING(chan, 2 + 5 * vp->nr_insns); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); OUT_RING(chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); OUT_RINGp(chan, vp->insns[i].data, 4); } @@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) if(nvfx->dirty & (NVFX_NEW_VERTPROG)) { - WAIT_RING(chan, 6); - OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1); OUT_RING(chan, vp->exec->start); if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1); OUT_RING(chan, vp->ir); } } diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6e886433bc..37b635fd12 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,7 +22,6 @@ #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -55,11 +54,12 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state); util_blitter_save_vertex_elements(r300->blitter, r300->velems); - util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, - r300->vertex_buffer); + util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, + r300->vbuf_mgr->vertex_buffer); - if (op & (R300_CLEAR_SURFACE | R300_COPY)) + if (op & (R300_CLEAR_SURFACE | R300_COPY)) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + } if (op & R300_COPY) { struct r300_textures_state* state = @@ -108,6 +108,22 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static boolean r300_fast_zclear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; +} + +static boolean r300_hiz_clear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -125,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format, } } +static uint32_t r300_hiz_clear_value(double depth) +{ + uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5); + assert(r <= 255); + return r | (r << 8) | (r << 16) | (r << 24); +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -132,37 +155,46 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* My notes about fastfill: + /* My notes about Zbuffer compression: * - * 1) Only the zbuffer is cleared. + * 1) The zbuffer must be micro-tiled and whole microtiles must be + * written if compression is enabled. If microtiling is disabled, + * it locks up. * - * 2) The zbuffer must be micro-tiled and whole microtiles must be - * written. If microtiling is disabled, it locks up. + * 2) There is ZMASK RAM which contains a compressed zbuffer. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel tiles, that is 2 bits for each tile. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. On newer chipsets, there is a new compression mode + * with 8x8 pixel tiles per 2 bits. * - * 3) There is Z Mask RAM which contains a compressed zbuffer and - * it interacts with fastfill. We should figure out how to use it - * to get more performance. - * This is what we know about the Z Mask: + * 3) The FASTFILL bit has nothing to do with filling. It only tells hw + * it should look in the ZMASK RAM first before fetching from a real + * zbuffer. * - * Each dword of the Z Mask contains compression information - * for 16 4x4 pixel blocks, that is 2 bits for each block. - * On chips with 2 Z pipes, every other dword maps to a different - * pipe. + * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned + * during zbuffer reads instead of the value that is actually stored + * in the zbuffer memory. A pixel is in a cleared state when its ZMASK + * is equal to 0. Therefore, if you clear ZMASK with zeros, you may + * leave the zbuffer memory uninitialized, but then you must enable + * compression, so that the ZMASK RAM is actually used. * - * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must - * be equal to 0. (clear the Z Mask RAM with zeros) + * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed + * during zbuffer updates. A special decompressing operation should be + * used to fully decompress a zbuffer, which basically just stores all + * compressed tiles in ZMASK to the zbuffer memory. * - * 5) For 16-bit zbuffer, compression causes a hung with one or + * 6) For a 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears * to avoid needless decompression. * - * 7) Fastfill must not be used if reading of compressed Z data is disabled + * 8) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. * - * 8) ZB_CB_CLEAR does not interact with fastfill in any way. + * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way. * * - Marek */ @@ -172,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -182,16 +212,18 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + if (r300_fast_zclear_allowed(r300)) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) { r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + + if (r300_hiz_clear_allowed(r300)) { + r300->hiz_clear_value = r300_hiz_clear_value(depth); r300_mark_atom_dirty(r300, &r300->hiz_clear); + } } /* Enable CBZB clear. */ @@ -205,7 +237,7 @@ static void r300_clear(struct pipe_context* pipe, height = surf->cbzb_height; r300->cbzb_clear = TRUE; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Clear. */ @@ -218,26 +250,28 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty) { - /* Just clear zmask and hiz now, this does not use a standard draw + } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { + /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; /* Calculate zmask_clear and hiz_clear atom sizes. */ r300_update_hyperz_state(r300); - dwords = r300->zmask_clear.size + + dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + r300_get_num_cs_end_dwords(r300); /* Reserve CS space. */ if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); } /* Emit clear packets. */ - r300_emit_zmask_clear(r300, r300->zmask_clear.size, - r300->zmask_clear.state); - r300->zmask_clear.dirty = FALSE; + if (r300->zmask_clear.dirty) { + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + } if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); @@ -251,16 +285,14 @@ static void r300_clear(struct pipe_context* pipe, if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; hyperz->zb_depthclearvalue = hyperz_dcv; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update - * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (zstex && - (zstex->zmask_in_use[fb->zsbuf->u.tex.level] || - zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + * looks if zmask/hiz is in use and programs hardware accordingly. */ + if (r300->zmask_in_use || r300->hiz_in_use) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -274,10 +306,16 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); + r300->hyperz_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); r300_blitter_end(r300); + + r300->hyperz_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } /* Clear a region of a depth stencil surface. */ @@ -290,42 +328,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + if (r300->zmask_in_use && !r300->hyperz_locked) { + if (fb->zsbuf->texture == dst->texture) { + r300_decompress_zmask(r300); + } else { + r300->hyperz_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); r300_blitter_end(r300); + + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } -/* Flush a depth stencil buffer. */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer) +void r300_decompress_zmask(struct r300_context *r300) { - struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *dstsurf, surf_tmpl; - struct r300_texture *tex = r300_texture(dst); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!tex->zmask_mem[level]) - return; - if (!tex->zmask_in_use[level]) + if (!r300->zmask_in_use || r300->hyperz_locked) return; - surf_tmpl.format = dst->format; - surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); + r300->zmask_decompress = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300->z_decomp_rd = TRUE; - r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0, + r300->dsa_decompress_zmask); r300_blitter_end(r300); - r300->z_decomp_rd = FALSE; - tex->zmask_in_use[level] = FALSE; + r300->zmask_decompress = FALSE; + r300->zmask_in_use = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); +} + +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) +{ + struct pipe_framebuffer_state fb = {0}; + fb.width = r300->locked_zbuffer->width; + fb.height = r300->locked_zbuffer->height; + fb.nr_cbufs = 0; + fb.zsbuf = r300->locked_zbuffer; + + r300->context.set_framebuffer_state(&r300->context, &fb); + r300_decompress_zmask(r300); +} + +void r300_decompress_zmask_locked(struct r300_context *r300) +{ + struct pipe_framebuffer_state saved_fb = {0}; + + util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); + r300_decompress_zmask_locked_unsafe(r300); + r300->context.set_framebuffer_state(&r300->context, &saved_fb); + util_unreference_framebuffer_state(&saved_fb); } /* Copy a block of pixels from one surface to another using HW. */ @@ -340,8 +406,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - - /* Do a copy */ util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); r300_blitter_end(r300); @@ -356,54 +420,103 @@ static void r300_resource_copy_region(struct pipe_context *pipe, unsigned src_level, const struct pipe_box *src_box) { - enum pipe_format old_format = dst->format; - enum pipe_format new_format = old_format; - boolean is_depth; - if (!pipe->screen->is_format_supported(pipe->screen, - old_format, src->target, - src->nr_samples, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0) && - util_format_is_plain(old_format)) { - switch (util_format_get_blocksize(old_format)) { + struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct pipe_resource old_src = *src; + struct pipe_resource old_dst = *dst; + struct pipe_resource new_src = old_src; + struct pipe_resource new_dst = old_dst; + const struct util_format_description *desc = + util_format_description(dst->format); + struct pipe_box box; + + if (r300->zmask_in_use && !r300->hyperz_locked) { + if (fb->zsbuf->texture == src || + fb->zsbuf->texture == dst) { + r300_decompress_zmask(r300); + } else { + r300->hyperz_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } + + /* Handle non-renderable plain formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || + !pipe->screen->is_format_supported(pipe->screen, + src->format, src->target, + src->nr_samples, + PIPE_BIND_SAMPLER_VIEW) || + !pipe->screen->is_format_supported(pipe->screen, + dst->format, dst->target, + dst->nr_samples, + PIPE_BIND_RENDER_TARGET))) { + switch (util_format_get_blocksize(old_dst.format)) { case 1: - new_format = PIPE_FORMAT_I8_UNORM; + new_dst.format = PIPE_FORMAT_I8_UNORM; break; case 2: - new_format = PIPE_FORMAT_B4G4R4A4_UNORM; + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; break; case 4: - new_format = PIPE_FORMAT_B8G8R8A8_UNORM; + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; break; case 8: - new_format = PIPE_FORMAT_R16G16B16A16_UNORM; + new_dst.format = PIPE_FORMAT_R16G16B16A16_UNORM; break; default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" "r300: surface_copy: Software fallback doesn't work for tiled textures.\n", - util_format_short_name(old_format)); + util_format_short_name(dst->format)); } + new_src.format = new_dst.format; } - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) { - r300_flush_depth_stencil(pipe, src, src_level, src_box->z); - } - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, new_format); - r300_texture_reinterpret_format(pipe->screen, - src, new_format); + /* Handle compressed formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + switch (util_format_get_blocksize(old_dst.format)) { + case 8: + /* 1 pixel = 4 bits, + * we set 1 pixel = 2 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; + break; + case 16: + /* 1 pixel = 8 bits, + * we set 1 pixel = 4 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + } + + /* Since the pixels are 4 times larger, we must decrease + * the image size and the coordinates 4 times. */ + new_src.format = new_dst.format; + new_dst.height0 = (new_dst.height0 + 3) / 4; + new_src.height0 = (new_src.height0 + 3) / 4; + dsty /= 4; + box = *src_box; + box.y /= 4; + box.height = (box.height + 3) / 4; + src_box = &box; } + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &new_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &new_dst); + r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, old_format); - r300_texture_reinterpret_format(pipe->screen, - src, old_format); + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &old_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); + + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 9d3d4fc1b1..b373937a1f 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -61,40 +61,52 @@ * that they neatly hide away, and don't have the cost of function setup, so * we're going to use them. */ -#ifdef DEBUG -#define CB_DEBUG(x) x -#else -#define CB_DEBUG(x) -#endif - - /** * Command buffer setup. */ +#ifdef DEBUG + #define CB_LOCALS \ - CB_DEBUG(int cs_count = 0;) \ + int cs_count = 0; \ uint32_t *cs_ptr = NULL; \ - CB_DEBUG((void) cs_count;) (void) cs_ptr; + (void) cs_count; (void) cs_ptr -#define NEW_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ - CB_DEBUG(cs_count = size;) \ +#define BEGIN_CB(ptr, size) do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr); \ } while (0) -#define BEGIN_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = ptr; \ - CB_DEBUG(cs_count = size;) \ +#define NEW_CB(ptr, size) \ + do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ } while (0) #define END_CB do { \ - CB_DEBUG(if (cs_count != 0) \ + if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ - cs_count, __FUNCTION__, __FILE__, __LINE__);) \ + cs_count, __FUNCTION__, __FILE__, __LINE__); \ } while (0) +#define CB_USED_DW(x) cs_count -= x + +#else + +#define CB_LOCALS \ + uint32_t *cs_ptr = NULL; (void) cs_ptr + +#define NEW_CB(ptr, size) \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)) + +#define BEGIN_CB(ptr, size) cs_ptr = (ptr) +#define END_CB +#define CB_USED_DW(x) + +#endif + /** * Storing pure DWORDs. @@ -103,13 +115,13 @@ #define OUT_CB(value) do { \ *cs_ptr = (value); \ cs_ptr++; \ - CB_DEBUG(cs_count--;) \ + CB_USED_DW(1); \ } while (0) #define OUT_CB_TABLE(values, count) do { \ memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ cs_ptr += count; \ - CB_DEBUG(cs_count -= count;) \ + CB_USED_DW(count); \ } while (0) #define OUT_CB_32F(value) \ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 583e981a4d..990acea9f4 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -241,7 +241,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5A42: caps->family = CHIP_FAMILY_RS400; caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: @@ -256,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -266,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -277,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -366,7 +359,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; - /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ + caps->hiz_ram = RV530_HIZ_LIMIT; caps->zmask_ram = PIPE_ZMASK_SIZE; break; @@ -424,5 +417,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f2035d2009..68943d561b 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,6 +33,13 @@ #define PIPE_ZMASK_SIZE 4096 #define RV3xx_ZMASK_SIZE 5120 +/* The size of a compressed tile. Each compressed tile takes 2 bits + * in the ZMASK RAM, so there is always 16 tiles per one dword. */ +enum r300_zmask_compression { + R300_ZCOMP_4X4 = 4, + R300_ZCOMP_8X8 = 8 +}; + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -50,10 +57,12 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + /* Some chipsets do not have HiZ RAM - other have varying amounts. */ int hiz_ram; - /* some chipsets have zmask ram per pipe some don't */ + /* Some chipsets have zmask ram per pipe some don't. */ int zmask_ram; + /* Compression mode for ZMASK. */ + enum r300_zmask_compression z_compress; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds @@ -81,8 +90,6 @@ struct r300_capabilities { boolean high_second_pipe; /* DXTC texture swizzling. */ boolean dxtc_swizzle; - /* Index bias (AKA index offset). */ - boolean index_bias_supported; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 67b011a145..720d666d98 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,29 +30,28 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" -#include <inttypes.h> - static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) { + pipe_mutex_lock(r300screen->num_contexts_mutex); if (diff > 0) { - p_atomic_inc(&r300screen->num_contexts); + r300screen->num_contexts++; if (r300screen->num_contexts > 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_MULTITHREADED); } else { - p_atomic_dec(&r300screen->num_contexts); + r300screen->num_contexts--; if (r300screen->num_contexts <= 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_SINGLETHREADED); } + pipe_mutex_unlock(r300screen->num_contexts_mutex); } static void r300_release_referenced_objects(struct r300_context *r300) @@ -79,22 +78,18 @@ static void r300_release_referenced_objects(struct r300_context *r300) NULL); } - /* The dummy VBO. */ + /* Manually-created vertex buffers. */ pipe_resource_reference(&r300->dummy_vb, NULL); - - /* The SWTCL VBO. */ pipe_resource_reference(&r300->vbo, NULL); - /* Vertex buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - } - /* If there are any queries pending or not destroyed, remove them now. */ foreach_s(query, temp, &r300->query_list) { remove_from_list(query); FREE(query); } + + r300->context.delete_depth_stencil_alpha_state(&r300->context, + r300->dsa_decompress_zmask); } static void r300_destroy_context(struct pipe_context* context) @@ -106,20 +101,12 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->draw) draw_destroy(r300->draw); - if (r300->upload_vb) - u_upload_destroy(r300->upload_vb); - if (r300->upload_ib) - u_upload_destroy(r300->upload_ib); - - if (r300->tran.translate_cache) - translate_cache_destroy(r300->tran.translate_cache); + if (r300->vbuf_mgr) + u_vbuf_mgr_destroy(r300->vbuf_mgr); /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); - if (r300->zmask_mm) - r300_hyperz_destroy_mm(r300); - if (r300->cs) r300->rws->cs_destroy(r300->cs); @@ -152,11 +139,11 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300); } -void r300_flush_cb(void *data) +static void r300_flush_callback(void *data, unsigned flags) { struct r300_context* const cs_context_copy = data; - cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); + r300_flush(&cs_context_copy->context, flags, NULL); } #define R300_INIT_ATOM(atomname, atomsize) \ @@ -180,7 +167,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -206,18 +192,18 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ - R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + R300_INIT_ATOM(dsa_state, is_r500 ? (drm_2_6_0 ? 10 : 8) : 6); /* RB3D. */ R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); - R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -226,7 +212,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(rs_state, 0); /* SC, US. */ - R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + R300_INIT_ATOM(fb_state_pipelined, 8); /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); @@ -237,9 +223,9 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (can_hyperz) { /* HiZ Clear */ if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 0); + R300_INIT_ATOM(hiz_clear, 4); /* zmask clear */ - R300_INIT_ATOM(zmask_clear, 0); + R300_INIT_ATOM(zmask_clear, 4); } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -341,7 +327,7 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the VAP invariant state. */ { - BEGIN_CB(vap_invariant->cb, 9); + BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size); OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CB_32F(1.0); @@ -349,6 +335,10 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_32F(1.0); OUT_CB_32F(1.0); OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); + } END_CB; } @@ -357,17 +347,22 @@ static void r300_init_states(struct pipe_context *pipe) BEGIN_CB(invariant->cb, r300->invariant_state.size); OUT_CB_REG(R300_GB_SELECT, 0); OUT_CB_REG(R300_FG_FOG_BLEND, 0); - OUT_CB_REG(R300_GA_ROUND_MODE, 1); OUT_CB_REG(R300_GA_OFFSET, 0); OUT_CB_REG(R300_SU_TEX_WRAP, 0); OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff); if (r300->screen->caps.is_rv350) { OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0); + OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0); + } END_CB; } @@ -443,39 +438,23 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); r300_init_resource_functions(r300); + r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!r300->vbuf_mgr) + goto fail; + r300->blitter = util_blitter_create(&r300->context); if (r300->blitter == NULL) goto fail; /* Render functions must be initialized after blitter. */ r300_init_render_functions(r300); - - rws->cs_set_flush(r300->cs, r300_flush_cb, r300); - - /* setup hyper-z mm */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - if (!r300_hyperz_init_mm(r300)) - goto fail; - - r300->upload_ib = u_upload_create(&r300->context, - 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - - if (r300->upload_ib == NULL) - goto fail; - - r300->upload_vb = u_upload_create(&r300->context, - 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (r300->upload_vb == NULL) - goto fail; - - r300->tran.translate_cache = translate_cache_create(); - if (r300->tran.translate_cache == NULL) - goto fail; - r300_init_states(&r300->context); + rws->cs_set_flush(r300->cs, r300_flush_callback, r300); + /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this * dummy texture there. */ @@ -502,7 +481,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } { - struct pipe_resource vb = {}; + struct pipe_resource vb; + memset(&vb, 0, sizeof(vb)); vb.target = PIPE_BUFFER; vb.format = PIPE_FORMAT_R8_UNORM; vb.bind = PIPE_BIND_VERTEX_BUFFER; @@ -514,36 +494,45 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->dummy_vb = screen->resource_create(screen, &vb); } + { + struct pipe_depth_stencil_alpha_state dsa; + memset(&dsa, 0, sizeof(dsa)); + dsa.depth.writemask = 1; + + r300->dsa_decompress_zmask = + r300->context.create_depth_stencil_alpha_state(&r300->context, + &dsa); + } + + /* Print driver info. */ +#ifdef DEBUG + { +#else + if (DBG_ON(r300, DBG_INFO)) { +#endif + fprintf(stderr, + "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" + "r300: GART size: %d MB, VRAM size: %d MB\n" + "r300: AA compression: %s, Z compression: %s, HiZ: %s\n", + rws->get_value(rws, R300_VID_DRM_MAJOR), + rws->get_value(rws, R300_VID_DRM_MINOR), + rws->get_value(rws, R300_VID_DRM_PATCHLEVEL), + screen->get_name(screen), + rws->get_value(rws, R300_VID_PCI_ID), + rws->get_value(rws, R300_VID_GB_PIPES), + rws->get_value(rws, R300_VID_Z_PIPES), + rws->get_value(rws, R300_VID_GART_SIZE) >> 20, + rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, + rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.zmask_ram ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.hiz_ram ? "YES" : "NO"); + } + return &r300->context; - fail: +fail: r300_destroy_context(&r300->context); return NULL; } - -void r300_finish(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb; - unsigned i; - - /* This is a preliminary implementation of glFinish. - * - * The ideal implementation should use something like EmitIrqLocked and - * WaitIrq, or better, real fences. - */ - if (r300->fb_state.state) { - fb = r300->fb_state.state; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (fb->cbufs[i]->texture) { - r300->rws->buffer_wait(r300->rws, - r300_texture(fb->cbufs[i]->texture)->buffer); - return; - } - } - if (fb->zsbuf && fb->zsbuf->texture) { - r300->rws->buffer_wait(r300->rws, - r300_texture(fb->zsbuf->texture)->buffer); - } - } -} diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 39dcde0610..e395f41290 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,11 +30,11 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_transfer.h" - -#include "translate/translate_cache.h" +#include "util/u_vbuf_mgr.h" #include "r300_defines.h" #include "r300_screen.h" +#include "r300_winsys.h" struct u_upload_mgr; struct r300_context; @@ -65,11 +65,15 @@ struct r300_aa_state { }; struct r300_blend_state { - uint32_t cb[8]; + struct pipe_blend_state state; + + uint32_t cb_clamp[8]; + uint32_t cb_noclamp[8]; uint32_t cb_no_readwrite[8]; }; struct r300_blend_color_state { + struct pipe_blend_color state; uint32_t cb[3]; }; @@ -91,9 +95,24 @@ struct r300_dsa_state { uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ uint32_t cb_reg; uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + uint32_t cb_reg1; + uint32_t alpha_value; /* R500_FG_ALPHA_VALUE: 0x4be0 */ + + /* The same, but for FP16 alpha test. */ + uint32_t cb_begin_fp16; + uint32_t alpha_function_fp16; /* R300_FG_ALPHA_FUNC: 0x4bd4 */ + uint32_t cb_reg_seq_fp16; + uint32_t z_buffer_control_fp16; /* R300_ZB_CNTL: 0x4f00 */ + uint32_t z_stencil_control_fp16; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ + uint32_t stencil_ref_mask_fp16; /* R300_ZB_STENCILREFMASK: 0x4f08 */ + uint32_t cb_reg_fp16; + uint32_t stencil_ref_bf_fp16; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + uint32_t cb_reg1_fp16; + uint32_t alpha_value_fp16; /* R500_FG_ALPHA_VALUE: 0x4be0 */ /* The second command buffer disables zbuffer reads and writes. */ - uint32_t cb_no_readwrite[8]; + uint32_t cb_zb_no_readwrite[10]; + uint32_t cb_fp16_zb_no_readwrite[10]; /* Whether a two-sided stencil is enabled. */ boolean two_sided; @@ -102,7 +121,6 @@ struct r300_dsa_state { }; struct r300_hyperz_state { - int current_func; /* -1 after a clear before first op */ int flush; /* This is actually a command buffer with named dwords. */ uint32_t cb_flush_begin; @@ -121,7 +139,7 @@ struct r300_gpu_flush { uint32_t cb_flush_clean[6]; }; -#define RS_STATE_MAIN_SIZE 23 +#define RS_STATE_MAIN_SIZE 25 struct r300_rs_state { /* Original rasterizer state. */ @@ -189,11 +207,6 @@ struct r300_sampler_view { uint32_t texcache_region; }; -struct r300_texture_fb_state { - uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */ - uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */ -}; - struct r300_texture_sampler_state { struct r300_texture_format_state format; uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ @@ -225,11 +238,11 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[20]; + uint32_t cb[24]; }; struct r300_vap_invariant_state { - uint32_t cb[9]; + uint32_t cb[11]; }; struct r300_viewport_state { @@ -273,14 +286,12 @@ struct r300_query { /* How many results have been written, in dwords. It's incremented * after end_query and flush. */ unsigned num_results; - /* if we've flushed the query */ - boolean flushed; /* if begin has been emitted */ boolean begin_emitted; /* The buffer where query results are stored. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ @@ -291,33 +302,19 @@ struct r300_query { struct r300_query* next; }; -/* Fence object. - * - * This is a fake fence. Instead of syncing with the fence, we sync - * with the context, which is inefficient but compliant. - * - * This is not a subclass of pipe_fence_handle because pipe_fence_handle is - * never actually fully defined. So, rather than have it as a member, and do - * subclass-style casting, we treat pipe_fence_handle as an opaque, and just - * trust that our state tracker does not ever mess up fence objects. - */ -struct r300_fence { - struct pipe_reference reference; - struct r300_context *ctx; - boolean signalled; -}; - struct r300_surface { struct pipe_surface base; /* Winsys buffer backing the texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t pitch_zmask; /* ZMASK_PITCH */ + uint32_t pitch_hiz; /* HIZ_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -329,13 +326,9 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; - }; struct r300_texture_desc { - /* Parent class. */ - struct u_resource b; - /* Width, height, and depth. * Most of the time, these are equal to pipe_texture::width0, height0, * and depth0. However, NPOT 3D textures must have dimensions aligned @@ -387,28 +380,39 @@ struct r300_texture_desc { /* Whether CBZB fast color clear is allowed on the miplevel. */ boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; + + /* Zbuffer compression info for each miplevel. */ + boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; + /* If zero, then disable Z compression/HiZ. */ + unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; + /* Zmask/HiZ strides for each miplevel. */ + unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; }; -struct r300_texture { - struct r300_texture_desc desc; +struct r300_resource +{ + struct u_vbuf_resource b; + /* Winsys buffer backing this resource. */ + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; + unsigned buf_size; + + /* Constant buffers are in user memory. */ + uint8_t *constant_buffer; - /* Pipe buffer backing this texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + /* Texture description (addressing, layout, special features). */ + struct r300_texture_desc tex; /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ struct r300_texture_format_state tx_format; - /* All bits should be filled in. */ - struct r300_texture_fb_state fb_state; - /* hyper-z memory allocs */ - struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; - boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; + /* Where the texture starts in the buffer. */ + unsigned tex_offset; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -418,33 +422,29 @@ struct r300_texture { struct r300_vertex_element_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + unsigned format_size[PIPE_MAX_ATTRIBS]; - /* If (velem[i].src_format != hw_format[i]), the vertex buffer - * referenced by this vertex element cannot be used for rendering and - * its vertex data must be translated to hw_format[i]. */ - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + struct u_vbuf_mgr_elements *vmgr_elements; /* The size of the vertex, in dwords. */ unsigned vertex_size_dwords; - /* This might mean two things: - * - src_format != hw_format, as discussed above. - * - src_offset % 4 != 0. */ - boolean incompatible_layout; - struct r300_vertex_stream_state vertex_stream; }; -struct r300_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; +enum r300_hiz_func { + HIZ_FUNC_NONE, + + /* The function, when determined, is set in stone + * until the next HiZ clear. */ - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; + /* MAX is written to the HiZ buffer. + * Used for LESS, LEQUAL. */ + HIZ_FUNC_MAX, - /* Saved and new vertex element state. */ - void *saved_velems, *new_velems; + /* MIN is written to the HiZ buffer. + * Used for GREATER, GEQUAL. */ + HIZ_FUNC_MIN, }; struct r300_context { @@ -473,8 +473,6 @@ struct r300_context { struct blitter_context* blitter; /* Stencil two-sided reference value fallback. */ struct r300_stencilref_context *stencilref_fallback; - /* For translating vertex buffers having incompatible vertex layout. */ - struct r300_translate_context tran; /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this @@ -556,13 +554,8 @@ struct r300_context { /* The pointers to the first and the last atom. */ struct r300_atom *first_dirty, *last_dirty; - /* Vertex buffers for Gallium. */ - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - int vertex_buffer_count; - int vertex_buffer_max_index; /* Vertex elements for Gallium. */ struct r300_vertex_element_state *velems; - bool any_user_vbs; struct pipe_index_buffer index_buffer; @@ -587,21 +580,26 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ - boolean incompatible_vb_layout; -#define R300_Z_COMPRESS_44 1 -#define RV350_Z_COMPRESS_88 2 - int z_compression; + /* Whether fast color clear is enabled. */ boolean cbzb_clear; - boolean z_decomp_rd; - - /* two mem block managers for hiz/zmask ram space */ - struct mem_block *hiz_mm; - struct mem_block *zmask_mm; - - /* upload managers */ - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; + /* Whether ZMASK is enabled. */ + boolean zmask_in_use; + /* Whether ZMASK is being decompressed. */ + boolean zmask_decompress; + /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ + boolean hyperz_locked; + /* The zbuffer the ZMASK of which is locked. */ + struct pipe_surface *locked_zbuffer; + /* Whether HIZ is enabled. */ + boolean hiz_in_use; + /* HiZ function. Can be either MIN or MAX. */ + enum r300_hiz_func hiz_func; + /* HiZ clear value. */ + uint32_t hiz_clear_value; + + void *dsa_decompress_zmask; + + struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -611,13 +609,12 @@ struct r300_context { /* const tracking for VS */ int vs_const_base; - /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */ - uint32_t aos_cb[(16 * 3 + 1) / 2]; - boolean aos_dirty; - - /* Whether any buffer (FB, textures, VBOs) has been set, but buffers - * haven't been validated yet. */ - boolean validate_buffers; + /* Vertex array state info */ + boolean vertex_arrays_dirty; + boolean vertex_arrays_indexed; + int vertex_arrays_offset; + int vertex_arrays_instance_id; + boolean instancing_enabled; }; #define foreach_atom(r300, atom) \ @@ -637,9 +634,9 @@ static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) return (struct r300_surface*)surf; } -static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex) +static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex) { - return (struct r300_texture*)tex; + return (struct r300_resource*)tex; } static INLINE struct r300_context* r300_context(struct pipe_context* context) @@ -652,12 +649,25 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) return (struct r300_fragment_shader*)r300->fs.state; } +static INLINE void r300_mark_atom_dirty(struct r300_context *r300, + struct r300_atom *atom) +{ + atom->dirty = TRUE; + + if (!r300->first_dirty) { + r300->first_dirty = atom; + r300->last_dirty = atom+1; + } else { + if (atom < r300->first_dirty) + r300->first_dirty = atom; + else if (atom+1 > r300->last_dirty) + r300->last_dirty = atom+1; + } +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -void r300_finish(struct r300_context *r300); -void r300_flush_cb(void *data); - /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_blit_functions(struct r300_context *r300); @@ -668,10 +678,17 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer); +void r300_decompress_zmask(struct r300_context *r300); +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); +void r300_decompress_zmask_locked(struct r300_context *r300); + +/* r300_flush.c */ +void r300_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence); + +/* r300_hyperz.c */ +void r300_update_hyperz_state(struct r300_context* r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -679,8 +696,6 @@ void r300_resume_query(struct r300_context *r300, void r300_stop_query(struct r300_context *r300); /* r300_render_translate.c */ -void r300_begin_vertex_translate(struct r300_context *r300); -void r300_end_vertex_translate(struct r300_context *r300); void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -696,29 +711,16 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG, - R300_CHANGED_ZCLEAR_FLAG + R300_CHANGED_HYPERZ_FLAG, + R300_CHANGED_MULTIWRITE }; void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change); void r300_mark_fs_code_dirty(struct r300_context *r300); -static INLINE void r300_mark_atom_dirty(struct r300_context *r300, - struct r300_atom *atom) -{ - atom->dirty = TRUE; - - if (!r300->first_dirty) { - r300->first_dirty = atom; - r300->last_dirty = atom+1; - } else { - if (atom < r300->first_dirty) - r300->first_dirty = atom; - if (atom+1 > r300->last_dirty) - r300->last_dirty = atom+1; - } -} +/* r300_state_derived.c */ +void r300_update_derived_state(struct r300_context* r300); /* r300_debug.c */ void r500_dump_rs_block(struct r300_rs_block *rs); diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 67fb0096a8..2e52dfa43c 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -35,12 +35,6 @@ * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ -#ifdef DEBUG -#define CS_DEBUG(x) x -#else -#define CS_DEBUG(x) -#endif - /** * Command submission setup. */ @@ -50,22 +44,29 @@ struct r300_winsys_screen *cs_winsys = (context)->rws; \ int cs_count = 0; (void) cs_count; (void) cs_winsys; +#ifdef DEBUG + #define BEGIN_CS(size) do { \ assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ - CS_DEBUG(cs_count = size;) \ + cs_count = size; \ } while (0) -#ifdef DEBUG #define END_CS do { \ if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ cs_count, __FUNCTION__, __FILE__, __LINE__); \ cs_count = 0; \ } while (0) + +#define CS_USED_DW(x) cs_count -= (x) + #else + +#define BEGIN_CS(size) #define END_CS -#endif +#define CS_USED_DW(x) +#endif /** * Writing pure DWORDs. @@ -73,7 +74,7 @@ #define OUT_CS(value) do { \ cs_copy->buf[cs_copy->cdw++] = (value); \ - CS_DEBUG(cs_count--;) \ + CS_USED_DW(1); \ } while (0) #define OUT_CS_32F(value) \ @@ -98,7 +99,7 @@ #define OUT_CS_TABLE(values, count) do { \ memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \ cs_copy->cdw += count; \ - CS_DEBUG(cs_count -= count;) \ + CS_USED_DW(count); \ } while (0) @@ -106,27 +107,11 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ - assert(bo); \ - OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ - CS_DEBUG(cs_count -= 2;) \ -} while (0) - -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ - assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset, rd, wd); \ -} while (0) - -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ - assert(tex); \ - OUT_CS_RELOC(tex->cs_buffer, offset, rd, wd); \ -} while (0) - -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ - assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf, rd, wd); \ - CS_DEBUG(cs_count -= 2;) \ +#define OUT_CS_RELOC(r) do { \ + assert((r)); \ + assert((r)->cs_buf); \ + cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \ + CS_USED_DW(2); \ } while (0) @@ -135,7 +120,7 @@ */ #define WRITE_CS_TABLE(values, count) do { \ - CS_DEBUG(assert(cs_count == 0);) \ + assert(cs_count == 0); \ memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ cs_copy->cdw += (count); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 52031dd97b..b60cfd1f24 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -27,6 +27,7 @@ #include <stdio.h> static const struct debug_named_value debug_options[] = { + { "info", DBG_INFO, "Print hardware info"}, { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, @@ -41,6 +42,7 @@ static const struct debug_named_value debug_options[] = { { "fb", DBG_FB, "Log framebuffer" }, { "cbzb", DBG_CBZB, "Log fast color clear info" }, { "hyperz", DBG_HYPERZ, "Log HyperZ info" }, + { "upload", DBG_UPLOAD, "Log user buffer upload info" }, { "scissor", DBG_SCISSOR, "Log scissor info" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" }, { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" }, @@ -48,6 +50,8 @@ static const struct debug_named_value debug_options[] = { { "noimmd", DBG_NO_IMMD, "Disable immediate mode" }, { "noopt", DBG_NO_OPT, "Disable shader optimizations" }, { "nocbzb", DBG_NO_CBZB, "Disable fast color clear" }, + { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" }, + { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 04a5bd92d1..e17a907e77 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -26,7 +26,6 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_mm.h" -#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cb.h" @@ -46,7 +45,10 @@ void r300_emit_blend_state(struct r300_context* r300, CS_LOCALS(r300); if (fb->nr_cbufs) { - WRITE_CS_TABLE(blend->cb, size); + if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(blend->cb_noclamp, size); + else + WRITE_CS_TABLE(blend->cb_clamp, size); } else { WRITE_CS_TABLE(blend->cb_no_readwrite, size); } @@ -78,9 +80,15 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); if (fb->zsbuf) { - WRITE_CS_TABLE(&dsa->cb_begin, size); + if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(&dsa->cb_begin_fp16, size); + else + WRITE_CS_TABLE(&dsa->cb_begin, size); } else { - WRITE_CS_TABLE(dsa->cb_no_readwrite, size); + if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(dsa->cb_fp16_zb_no_readwrite, size); + else + WRITE_CS_TABLE(dsa->cb_zb_no_readwrite, size); } } @@ -90,7 +98,7 @@ static void get_rc_constant_state( struct rc_constant * constant) { struct r300_textures_state* texstate = r300->textures_state.state; - struct r300_texture *tex; + struct r300_resource *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -102,19 +110,19 @@ static void get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); - vec[0] = 1.0 / tex->desc.width0; - vec[1] = 1.0 / tex->desc.height0; + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->tex.width0; + vec[1] = 1.0 / tex->tex.height0; vec[2] = 0; vec[3] = 1; break; case RC_STATE_R300_TEXSCALE_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); /* Add a small number to the texture size to work around rounding errors in hw. */ - vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); - vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); - vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); + vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f); + vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f); + vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f); vec[3] = 1; break; @@ -353,11 +361,9 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); if (aa->dest) { - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset, 0, aa->dest->domain); - - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch, 0, aa->dest->domain); + OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); + OUT_CS_RELOC(aa->dest); + OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -370,6 +376,8 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct r300_surface* surf; unsigned i; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t rb3d_cctl = 0; + CS_LOCALS(r300); BEGIN_CS(size); @@ -377,21 +385,24 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not * what we usually want. */ if (r300->screen->caps.is_r500) { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); - } else { - OUT_CS_REG(R300_RB3D_CCTL, 0); + rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; } + if (fb->nr_cbufs && + r300_fragment_shader_writes_all(r300_fs(r300))) { + rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); + } + + OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { surf = r300_surface(fb->cbufs[i]); - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); + OUT_CS_RELOC(surf); } /* Set up the ZB part of the CBZB clear. */ @@ -400,11 +411,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch); + OUT_CS_RELOC(surf); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -416,37 +427,19 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); + OUT_CS_RELOC(surf); if (can_hyperz) { - uint32_t surf_pitch; - struct r300_texture *tex; - int level = surf->base.u.tex.level; - tex = r300_texture(surf->base.texture); - - surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; /* HiZ RAM. */ - if (r300->screen->caps.hiz_ram) { - if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } - } + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ - if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } @@ -458,6 +451,7 @@ void r300_emit_hyperz_state(struct r300_context *r300, { struct r300_hyperz_state *z = state; CS_LOCALS(r300); + if (z->flush) WRITE_CS_TABLE(&z->cb_flush_begin, size); else @@ -483,15 +477,22 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - unsigned i; + unsigned i, num_cbufs = fb->nr_cbufs; + unsigned mspos0, mspos1; CS_LOCALS(r300); + /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be + * marked as UNUSED in the US block. */ + if (r300_fragment_shader_writes_all(r300_fs(r300))) { + num_cbufs = MIN2(num_cbufs, 1); + } + BEGIN_CS(size); /* Colorbuffer format in the US block. * (must be written after unpipelined regs) */ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); - for (i = 0; i < fb->nr_cbufs; i++) { + for (i = 0; i < num_cbufs; i++) { OUT_CS(r300_surface(fb->cbufs[i])->format); } for (; i < 4; i++) { @@ -501,38 +502,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved * to the AA state. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - unsigned mspos0 = 0x66666666; - unsigned mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } - } + mspos0 = 0x66666666; + mspos1 = 0x6666666; - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); END_CS; } @@ -553,14 +552,12 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; query->begin_emitted = TRUE; - query->flushed = FALSE; } static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -578,29 +575,25 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4); + OUT_CS_RELOC(r300->query_current); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4); + OUT_CS_RELOC(r300->query_current); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -616,13 +609,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -630,16 +622,15 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -778,7 +769,7 @@ void r300_emit_textures_state(struct r300_context *r300, { struct r300_textures_state *allstate = (struct r300_textures_state*)state; struct r300_texture_sampler_state *texstate; - struct r300_texture *tex; + struct r300_resource *tex; unsigned i; CS_LOCALS(r300); @@ -788,7 +779,7 @@ void r300_emit_textures_state(struct r300_context *r300, for (i = 0; i < allstate->count; i++) { if ((1 << i) & allstate->tx_enable) { texstate = &allstate->regs[i]; - tex = r300_texture(allstate->sampler_views[i]->base.texture); + tex = r300_resource(allstate->sampler_views[i]->base.texture); OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0); OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1); @@ -799,73 +790,35 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0); + OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); + OUT_CS_RELOC(tex); } } END_CS; } -static void r300_update_aos_cb(struct r300_context *r300, unsigned packet_size) +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id) { - struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; + struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; + struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2, aos_count = r300->velems->count; + struct r300_resource *buf; int i; - CB_LOCALS; - - BEGIN_CB(r300->aos_cb, packet_size); - for (i = 0; i < aos_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; - - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - OUT_CB(vb2->buffer_offset + velem[i+1].src_offset); - } - - if (aos_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; - - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - } - END_CB; - - r300->aos_dirty = FALSE; -} - -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - struct r300_buffer *buf; - int i; - unsigned aos_count = r300->velems->count; - unsigned packet_size = (aos_count * 3 + 1) / 2; + unsigned vertex_array_count = r300->velems->count; + unsigned packet_size = (vertex_array_count * 3 + 1) / 2; + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size = r300->velems->format_size; + unsigned size1, size2, offset1, offset2, stride1, stride2; CS_LOCALS(r300); - BEGIN_CS(2 + packet_size + aos_count * 2); + BEGIN_CS(2 + packet_size + vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - - if (!offset) { - if (r300->aos_dirty) { - r300_update_aos_cb(r300, packet_size); - } - OUT_CS_TABLE(r300->aos_cb, packet_size); - } else { - struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2; + OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - for (i = 0; i < aos_count - 1; i += 2) { + if (instance_id == -1) { + /* Non-instanced arrays. This ignores instance_divisor and instance_id. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; @@ -877,23 +830,75 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); } - if (aos_count & 1) { + if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } - } - for (i = 0; i < aos_count; i++) { - buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } + } else { + /* Instanced arrays. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } + if (velem[i+1].instance_divisor) { + stride2 = 0; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + + (instance_id / velem[i+1].instance_divisor) * vb2->stride; + } else { + stride2 = vb2->stride; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; + } + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2)); + OUT_CS(offset1); + OUT_CS(offset2); + } + + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); + OUT_CS(offset1); + } + + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } } END_CS; } -void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) +void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); @@ -913,7 +918,8 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); + OUT_CS(0); + OUT_CS_RELOC(r300_resource(r300->vbo)); END_CS; } @@ -1080,109 +1086,47 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } -static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_hyperz_state *z = - (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0, height, offset_shift; - struct r300_texture* tex; - int i; - - tex = r300_texture(fb->zsbuf->texture); - - offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - /* convert from pixels to 4x4 blocks */ - stride = ALIGN_DIVUP(stride, 4); - - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); - /* there are 4 blocks per dwords */ - stride = ALIGN_DIVUP(stride, 4); - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); + struct r300_resource* tex; + CS_LOCALS(r300); - offset_shift = 2; - offset_shift += (r300screen->caps.num_frag_pipes / 2); + tex = r300_resource(fb->zsbuf->texture); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); - } - z->current_func = -1; + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(0); + OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(r300->hiz_clear_value); + END_CS; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->hiz_in_use = TRUE; + r300->hiz_func = HIZ_FUNC_NONE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0; - struct r300_texture* tex; - uint32_t i, height; - int mult, offset_shift; - - tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - offset_shift = 4; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + struct r300_resource *tex; + CS_LOCALS(r300); - /* okay have width in pixels - divide by block width */ - stride = ALIGN_DIVUP(stride, mult); - /* have width in blocks - divide by number of fragment pipes screen width */ - /* 16 blocks per dword */ - stride = ALIGN_DIVUP(stride, 16); + tex = r300_resource(fb->zsbuf->texture); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); - } + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(0); + OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(0); + END_CS; /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->zmask_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_ztop_state(struct r300_context* r300, @@ -1209,68 +1153,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, boolean do_validate_vertex_buffers, struct pipe_resource *index_buffer) { - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture* tex; - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - struct pipe_resource *pbuf; + struct r300_resource *tex; unsigned i; - - /* Clean out BOs. */ - r300->rws->cs_reset_buffers(r300->cs); - - /* Color buffers... */ - for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->cbufs[i])->domain); - } - /* ...depth buffer... */ - if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->zsbuf)->domain); - } - /* ...textures... */ - for (i = 0; i < texstate->count; i++) { - if (!(texstate->tx_enable & (1 << i))) { - continue; + boolean flushed = FALSE; + +validate: + if (r300->fb_state.dirty) { + /* Color buffers... */ + for (i = 0; i < fb->nr_cbufs; i++) { + tex = r300_resource(fb->cbufs[i]->texture); + assert(tex && tex->buf && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, + r300_surface(fb->cbufs[i])->domain); + } + /* ...depth buffer... */ + if (fb->zsbuf) { + tex = r300_resource(fb->zsbuf->texture); + assert(tex && tex->buf && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, + r300_surface(fb->zsbuf)->domain); } + } + if (r300->textures_state.dirty) { + /* ...textures... */ + for (i = 0; i < texstate->count; i++) { + if (!(texstate->tx_enable & (1 << i))) { + continue; + } - tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, tex->domain, 0); + tex = r300_resource(texstate->sampler_views[i]->base.texture); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0); + } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buffer, - 0, r300->query_current->domain); + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->cs_buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf, + r300_resource(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ - if (do_validate_vertex_buffers) { - for (i = 0; i < r300->velems->count; i++) { - pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - if (!pbuf) + if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { + struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; + struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer + + r300->vbuf_mgr->nr_real_vertex_buffers; + for (; buf != last; buf++) { + if (!*buf) continue; - r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->cs_buf, - r300_buffer(pbuf)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf, + r300_resource(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->cs_buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, + r300_resource(index_buffer)->domain, 0); + /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { - return FALSE; + /* Ooops, an infinite loop, give up. */ + if (flushed) + return FALSE; + + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); + flushed = TRUE; + goto validate; } return TRUE; @@ -1300,7 +1253,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emitted in flush. */ dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) dwords += 2; return dwords; diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 278dbcb4c7..6c1c9d2fb1 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,7 +31,8 @@ struct r300_vertex_program_code; uint32_t pack_float24(float f); -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed); +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); @@ -86,7 +87,7 @@ void r300_emit_scissor_state(struct r300_context* r300, void r300_emit_textures_state(struct r300_context *r300, unsigned size, void *state); -void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); +void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed); void r300_emit_vap_invariant_state(struct r300_context *r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 451fe525b4..b3d0d344ec 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -31,29 +31,38 @@ #include "r300_cs.h" #include "r300_emit.h" -static void r300_flush(struct pipe_context* pipe, - unsigned flags, - struct pipe_fence_handle** fence) + +void r300_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence) { struct r300_context *r300 = r300_context(pipe); - struct r300_query *query; struct r300_atom *atom; - struct r300_fence **rfence = (struct r300_fence**)fence; - - u_upload_flush(r300->upload_vb); - u_upload_flush(r300->upload_ib); + struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence; if (r300->draw && !r300->draw_vbo_locked) r300_draw_flush_vbuf(r300); + if (rfence) { + /* Create a fence, which is a dummy BO. */ + *rfence = r300->rws->buffer_create(r300->rws, 1, 1, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + R300_DOMAIN_GTT); + /* Add the fence as a dummy relocation. */ + r300->rws->cs_add_reloc(r300->cs, + r300->rws->buffer_get_cs_handle(*rfence), + R300_DOMAIN_GTT, R300_DOMAIN_GTT); + } + if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); r300->flush_counter++; - r300->rws->cs_flush(r300->cs); + r300->rws->cs_flush(r300->cs, flags); r300->dirty_hw = 0; /* New kitchen sink, baby. */ @@ -62,30 +71,35 @@ static void r300_flush(struct pipe_context* pipe, r300_mark_atom_dirty(r300, atom); } } + r300->vertex_arrays_dirty = TRUE; /* Unmark HWTCL state for SWTCL. */ if (!r300->screen->caps.has_tcl) { r300->vs_state.dirty = FALSE; r300->vs_constants.dirty = FALSE; } - - r300->validate_buffers = TRUE; - } - - /* reset flushed query */ - foreach(query, &r300->query_list) { - query->flushed = TRUE; + } else { + if (rfence) { + /* We have to create a fence object, but the command stream is empty + * and we cannot emit an empty CS. We must write some regs then. */ + CS_LOCALS(r300); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); + r300->rws->cs_flush(r300->cs, flags); + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs, flags); + } } +} - /* Create a new fence. */ - if (rfence) { - *rfence = CALLOC_STRUCT(r300_fence); - pipe_reference_init(&(*rfence)->reference, 1); - (*rfence)->ctx = r300; - } +static void r300_flush_wrapped(struct pipe_context *pipe, + struct pipe_fence_handle **fence) +{ + r300_flush(pipe, 0, fence); } void r300_init_flush_functions(struct r300_context* r300) { - r300->context.flush = r300_flush; + r300->context.flush = r300_flush_wrapped; } diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 2936c3486e..4c502fefb3 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -149,16 +149,18 @@ static void get_external_state( unsigned i; unsigned char *swizzle; + state->frag_clamp = 0; + for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; struct r300_sampler_view *v = texstate->sampler_views[i]; - struct r300_texture *t; + struct r300_resource *t; if (!s || !v) { continue; } - t = r300_texture(texstate->sampler_views[i]->base.texture); + t = r300_resource(texstate->sampler_views[i]->base.texture); if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; @@ -181,7 +183,7 @@ static void get_external_state( state->unit[i].non_normalized_coords = !s->state.normalized_coords; /* XXX this should probably take into account STR, not just S. */ - if (t->desc.is_npot) { + if (t->tex.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -201,7 +203,7 @@ static void get_external_state( state->unit[i].wrap_mode = RC_WRAP_NONE; } - if (t->desc.b.b.target == PIPE_TEXTURE_3D) + if (t->b.b.b.target == PIPE_TEXTURE_3D) state->unit[i].clamp_and_scale_before_fetch = TRUE; } } @@ -298,44 +300,98 @@ static void r300_emit_fs_code_to_buffer( } } else { /* r300 */ struct r300_fragment_program_code *code = &generic_code->code.r300; - - shader->cb_code_size = 19 + - (r300->screen->caps.is_r400 ? 2 : 0) + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - imm_count * 5; + unsigned int alu_length = code->alu.length; + unsigned int alu_iterations = ((alu_length - 1) / 64) + 1; + unsigned int tex_length = code->tex.length; + unsigned int tex_iterations = + tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0; + unsigned int iterations = + alu_iterations > tex_iterations ? alu_iterations : tex_iterations; + unsigned int bank = 0; + + shader->cb_code_size = 15 + + /* R400_US_CODE_BANK */ + (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) + + /* R400_US_CODE_EXT */ + (r300->screen->caps.is_r400 ? 2 : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */ + (code->r390_mode ? (5 * alu_iterations) : 4) + + /* R400_US_ALU_EXT_ADDR_[0-63] */ + (code->r390_mode ? (code->alu.length) : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */ + code->alu.length * 4 + + /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */ + (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) + + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); - if (r300->screen->caps.is_r400) - OUT_CB_REG(R400_US_CODE_BANK, 0); - OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + if (code->r390_mode) { + OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext); + } else if (r300->screen->caps.is_r400) { + /* This register appears to affect shaders even if r390_mode is + * disabled, so it needs to be set to 0 for shaders that + * don't use r390_mode. */ + OUT_CB_REG(R400_US_CODE_EXT, 0); + } + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); OUT_CB_TABLE(code->code_addr, 4); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_inst); + do { + unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64); + unsigned int bank_alu_offset = bank * 64; + unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32); + unsigned int bank_tex_offset = bank * 32; + + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ? + (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2 + } + + if (bank_alu_length > 0) { + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr); + + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_addr); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr); - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_inst); + if (code->r390_mode) { + OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr); + } + } + + if (bank_tex_length > 0) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length); + OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length); + } - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_addr); + alu_length -= bank_alu_length; + tex_length -= bank_tex_length; + bank++; + } while(code->r390_mode && (alu_length > 0 || tex_length > 0)); - if (code->tex.length) { - OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CB_TABLE(code->tex.inst, code->tex.length); + /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders + * will be rendered incorrectly. */ + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, + code->r390_mode ? R400_R390_MODE_ENABLE : 0); } /* Emit immediates. */ @@ -384,17 +440,29 @@ static void r300_translate_fragment_shader( compiler.code = &shader->code; compiler.state = shader->compare_state; compiler.Base.is_r500 = r300->screen->caps.is_r500; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); compiler.Base.has_half_swizzles = TRUE; compiler.Base.has_presub = TRUE; - compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; + compiler.Base.max_temp_regs = + compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32); compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; - compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_alu_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64; + compiler.Base.max_tex_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; find_output_registers(&compiler, shader); + shader->write_all = FALSE; + for (i = 0; i < shader->info.num_properties; i++) { + if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + shader->write_all = TRUE; + } + } + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); @@ -407,6 +475,13 @@ static void r300_translate_fragment_shader( r300_tgsi_to_rc(&ttr, tokens); + if (ttr.error) { + fprintf(stderr, "r300 FP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (!r300->screen->caps.is_r500 || compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 51bfa88c5e..c86a90b85a 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -54,6 +54,9 @@ struct r300_fragment_shader_code { uint32_t *cb_code; struct r300_fragment_shader_code* next; + + boolean write_all; + }; struct r300_fragment_shader { @@ -81,4 +84,10 @@ static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_sha return (fs->shader->code.writes_depth) ? TRUE : FALSE; } +static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs) +{ + if (!fs) + return FALSE; + return (fs->shader->write_all) ? TRUE : FALSE; +} #endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index c22e307c67..ecaadf4af8 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" -#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" #include "r300_winsys.h" @@ -41,58 +40,74 @@ /* The HyperZ setup */ /*****************************************************************************/ -static bool r300_get_sc_hz_max(struct r300_context *r300) +static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_SC_HYPERZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; - if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS) - ret = R300_SC_HYPERZ_MAX; - return ret; + if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask) + return HIZ_FUNC_NONE; + + switch (dsa->dsa.depth.func) { + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + case PIPE_FUNC_NOTEQUAL: + case PIPE_FUNC_ALWAYS: + return HIZ_FUNC_NONE; + + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + return HIZ_FUNC_MAX; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + return HIZ_FUNC_MIN; + + default: + assert(0); + return HIZ_FUNC_NONE; + } } -static bool r300_zfunc_same_direction(int func1, int func2) +/* Return what's used for the depth test (either minimum or maximum). */ +static unsigned r300_get_sc_hz_max(struct r300_context *r300) { - /* func1 is less/lessthan */ - if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) && - (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL || - func2 == R300_ZS_GREATER)) - return FALSE; - - /* func1 is greater/greaterthan */ - if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) && - (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL)) - return FALSE; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - return TRUE; + return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; } -static int r300_get_hiz_min(struct r300_context *r300) +static boolean r300_is_hiz_func_valid(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_HIZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL) - ret = R300_HIZ_MAX; - return ret; + if (r300->hiz_func == HIZ_FUNC_NONE) + return TRUE; + + /* func1 is less/lessthan */ + if (r300->hiz_func == HIZ_FUNC_MAX && + (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) + return FALSE; + + /* func1 is greater/greaterthan */ + if (r300->hiz_func == HIZ_FUNC_MIN && + (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) + return FALSE; + + return TRUE; } static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) { - if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || - s->zfail_op != PIPE_STENCIL_OP_KEEP)) - return TRUE; - return FALSE; + return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP); } static boolean r300_can_hiz(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; - struct r300_screen* r300screen = r300->screen; - struct r300_hyperz_state *z = r300->hyperz_state.state; + struct r300_dsa_state *dsa = r300->dsa_state.state; + struct r300_screen *r300screen = r300->screen; /* shader writes depth - no HiZ */ if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ @@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300) if (r300->query_current) return FALSE; + /* if stencil fail/zfail op is not KEEP */ - if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || - r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) return FALSE; - if (dsa->depth.enabled) { + if (dsa->dsa.depth.enabled) { /* if depth func is EQUAL pre-r500 */ - if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) return FALSE; + /* if depth func is NOTEQUAL */ - if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) return FALSE; } - /* depth comparison function - if just cleared save and return okay */ - if (z->current_func == -1) { - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - if (func != 0 && func != 7) - z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK; - } else { - /* simple don't change */ - if (!r300_zfunc_same_direction(z->current_func, - (dsa_state->z_stencil_control & R300_ZS_MASK))) { - DBG(r300, DBG_HYPERZ, - "z func changed direction - disabling hyper-z %d -> %d\n", - z->current_func, dsa_state->z_stencil_control); - return FALSE; - } - } return TRUE; } @@ -137,10 +139,8 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; - boolean zmask_in_use = FALSE; - boolean hiz_in_use = FALSE; + struct r300_resource *zstex = + fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -152,48 +152,54 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!zstex) - return; - - if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + if (!zstex || + !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level]; - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - - /* Z fastfill. */ - if (zmask_in_use) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ - } - /* Zbuffer compression. */ - if (zmask_in_use && r300->z_compression) { - z->zb_bw_cntl |= R300_RD_COMP_ENABLE; - if (r300->z_decomp_rd == false) + if (r300->zmask_in_use && !r300->hyperz_locked) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | + /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ + R300_RD_COMP_ENABLE; + + if (!r300->zmask_decompress) { z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + } } - /* RV350 and up optimizations. */ - /* The section 10.4.9 in the docs is a lie. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) + + if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) { z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + /* HiZ. */ + if (r300->hiz_in_use && !r300->hyperz_locked) { + /* Set the HiZ function if needed. */ + if (r300->hiz_func == HIZ_FUNC_NONE) { + r300->hiz_func = r300_get_hiz_func(r300); + } - if (hiz_in_use) { - bool can_hiz = r300_can_hiz(r300); - if (can_hiz) { - z->zb_bw_cntl |= R300_HIZ_ENABLE; - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; - z->sc_hyperz |= r300_get_sc_hz_max(r300); - z->zb_bw_cntl |= r300_get_hiz_min(r300); + /* If the depth function is inverted, HiZ must be disabled. */ + if (!r300_is_hiz_func_valid(r300)) { + r300->hiz_in_use = FALSE; + } else if (r300_can_hiz(r300)) { + /* Setup the HiZ bits. */ + z->zb_bw_cntl |= + R300_HIZ_ENABLE | + (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE; + } } } /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; - z->zb_bw_cntl |= - R500_HIZ_EQUAL_REJECT_ENABLE | - R500_PEQ_PACKING_ENABLE | - R500_COVERED_PTR_MASKING_ENABLE; + z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; } } @@ -285,135 +291,11 @@ static void r300_update_ztop(struct r300_context* r300) r300_mark_atom_dirty(r300, &r300->ztop_state); } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static void r300_update_hiz_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - r300->hiz_clear.size = height * 4; -} - -static void r300_update_zmask_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - int mult; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - r300->zmask_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } - - if (r300->hiz_clear.dirty) { - r300_update_hiz_clear(r300); - } - if (r300->zmask_clear.dirty) { - r300_update_zmask_clear(r300); - } -} - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) -{ - struct r300_texture *tex; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - - tex = r300_texture(surf->base.texture); - - if (tex->hiz_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - ndw = ALIGN_DIVUP(zsize, 64); - - tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); - return; -} - -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) -{ - int bsize = 256; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - struct r300_texture *tex; - - tex = r300_texture(surf->base.texture); - - /* We currently don't handle decompression for 3D textures and cubemaps - * correctly. */ - if (tex->desc.b.b.target != PIPE_TEXTURE_1D && - tex->desc.b.b.target != PIPE_TEXTURE_2D && - tex->desc.b.b.target != PIPE_TEXTURE_RECT) - return; - - /* Cannot flush zmask of 16-bit zbuffers. */ - if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16) - return; - - if (tex->zmask_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - - /* each zmask dword represents 16 4x4 blocks - which is 256 pixels - or 16 8x8 depending on the gb peq flag = 1024 pixels */ - if (compress == RV350_Z_COMPRESS_88) - bsize = 1024; - - ndw = ALIGN_DIVUP(zsize, bsize); - tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); - return; -} - -boolean r300_hyperz_init_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - int frag_pipes = r300screen->caps.num_frag_pipes; - - r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); - if (!r300->zmask_mm) - return FALSE; - - if (r300screen->caps.hiz_ram) { - r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); - if (!r300->hiz_mm) { - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; - return FALSE; - } - } - - return TRUE; -} - -void r300_hyperz_destroy_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - - if (r300screen->caps.hiz_ram) { - u_mmDestroy(r300->hiz_mm); - r300->hiz_mm = NULL; - } - - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h deleted file mode 100644 index 30a23ec649..0000000000 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2010 Marek Olšák <maraeo@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_HYPERZ_H -#define R300_HYPERZ_H - -struct r300_context; - -void r300_update_hyperz_state(struct r300_context* r300); - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); - -boolean r300_hyperz_init_mm(struct r300_context *r300); -void r300_hyperz_destroy_mm(struct r300_context *r300); -#endif diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 6223e04321..717485f43c 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -57,10 +57,10 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ - q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, + q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, q->domain); - q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer); + q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; } @@ -68,10 +68,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, static void r300_destroy_query(struct pipe_context* pipe, struct pipe_query* query) { - struct r300_context *r300 = r300_context(pipe); struct r300_query* q = r300_query(query); - r300->rws->buffer_reference(r300->rws, &q->buffer, NULL); + r300_winsys_bo_reference(&q->buf, NULL); remove_from_list(q); FREE(query); } @@ -128,16 +127,12 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_query *q = r300_query(query); - unsigned flags, i; + unsigned i; uint32_t temp, *map; - uint64_t *result = (uint64_t*)vresult; - - if (!q->flushed) - pipe->flush(pipe, 0, NULL); - - flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags); + map = r300->rws->buffer_map(q->buf, r300->cs, + PIPE_TRANSFER_READ | + (!wait ? PIPE_TRANSFER_DONTBLOCK : 0)); if (!map) return FALSE; @@ -148,9 +143,9 @@ static boolean r300_get_query_result(struct pipe_context* pipe, map++; } - r300->rws->buffer_unmap(r300->rws, q->buffer); + r300->rws->buffer_unmap(q->buf); - *result = temp; + *((uint64_t*)vresult) = temp; return TRUE; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 613186e815..bb30b1ab0b 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ +#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218 + #define R300_VAP_CLIP_CNTL 0x221C # define R300_VAP_UCP_ENABLE_0 (1 << 0) # define R300_VAP_UCP_ENABLE_1 (1 << 1) @@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_HORIZONTAL (0<<27) # define R500_TX_DIRECTION_VERITCAL (1<<27) +#define R500_SU_TEX_WRAP_PS3 0x4114 + /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ #define R300_GA_POINT_S0 0x4200 @@ -2162,14 +2166,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* R4xx extended fragment shader registers. */ #define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ -# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01 -# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02 -# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04 +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) # define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 -# define R400_ADDR0_EXT_A_MSB_BIT 0x10 -# define R400_ADDR1_EXT_A_MSB_BIT 0x20 -# define R400_ADDR2_EXT_A_MSB_BIT 0x40 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) # define R400_ADDRD_EXT_A_MSB_BIT 0x80 + #define R400_US_CODE_BANK 0x46b8 # define R400_BANK_SHIFT 0 # define R400_BANK_MASK 0xf @@ -2631,8 +2632,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_ZB_BW_CNTL 0x4f1c # define R300_HIZ_DISABLE (0 << 0) # define R300_HIZ_ENABLE (1 << 0) -# define R300_HIZ_MIN (0 << 1) -# define R300_HIZ_MAX (1 << 1) +# define R300_HIZ_MAX (0 << 1) +# define R300_HIZ_MIN (1 << 1) # define R300_FAST_FILL_DISABLE (0 << 2) # define R300_FAST_FILL_ENABLE (1 << 2) # define R300_RD_COMP_DISABLE (0 << 3) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b4197e0352..26594dabe4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -39,7 +39,6 @@ #include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" -#include "r300_state_derived.h" #include <limits.h> @@ -128,16 +127,30 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) END_CS; } +static void r300_emit_draw_init(struct r300_context *r300, unsigned mode, + unsigned min_index, unsigned max_index) +{ + CS_LOCALS(r300); + + BEGIN_CS(5); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(max_index); + OUT_CS(min_index); + END_CS; +} + /* This function splits the index bias value into two parts: * - buffer_offset: the value that can be safely added to buffer offsets - * in r300_emit_aos (it must yield a positive offset when added to + * in r300_emit_vertex_arrays (it must yield a positive offset when added to * a vertex buffer offset) * - index_offset: the value that must be manually subtracted from indices * in an index buffer to achieve negative offsets. */ static void r300_split_index_bias(struct r300_context *r300, int index_bias, int *buffer_offset, int *index_offset) { - struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer; + struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; unsigned i, size; int max_neg_bias; @@ -164,10 +177,10 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, } enum r300_prepare_flags { - PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */ + PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ - PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */ - PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */ + PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */ + PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ PREP_INDEXED = (1 << 4) /* is this draw_elements? */ }; @@ -180,33 +193,33 @@ enum r300_prepare_flags { * \return TRUE if the CS was flushed */ static boolean r300_reserve_cs_dwords(struct r300_context *r300, - enum r300_prepare_flags flags, - unsigned cs_dwords) + enum r300_prepare_flags flags, + unsigned cs_dwords) { boolean flushed = FALSE; - boolean first_draw = flags & PREP_FIRST_DRAW; - boolean emit_aos = flags & PREP_EMIT_AOS; - boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean first_draw = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; /* Add dirty state, index offset, and AOS. */ if (first_draw) { cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) cs_dwords += 2; /* emit_index_offset */ - if (emit_aos) - cs_dwords += 55; /* emit_aos */ + if (emit_vertex_arrays) + cs_dwords += 55; /* emit_vertex_arrays */ - if (emit_aos_swtcl) - cs_dwords += 7; /* emit_aos_swtcl */ + if (emit_vertex_arrays_swtcl) + cs_dwords += 7; /* emit_vertex_arrays_swtcl */ } cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); flushed = TRUE; } @@ -218,57 +231,55 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, * \param r300 The context. * \param flags See r300_prepare_flags. * \param index_buffer The index buffer to validate. The parameter may be NULL. - * \param aos_offset The offset passed to emit_aos. + * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id Index of instance to render * \return TRUE if rendering should be skipped */ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, - int aos_offset, - int index_bias) + int buffer_offset, + int index_bias, int instance_id) { - boolean first_draw = flags & PREP_FIRST_DRAW; - boolean emit_aos = flags & PREP_EMIT_AOS; - boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean first_draw = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; boolean indexed = flags & PREP_INDEXED; boolean validate_vbos = flags & PREP_VALIDATE_VBOS; /* Validate buffers and emit dirty state if needed. */ if (first_draw) { - /* upload buffers first */ - if (r300->screen->caps.has_tcl && r300->any_user_vbs) { - r300_upload_user_buffers(r300); - r300->any_user_vbs = false; - } - - if (r300->validate_buffers) { - if (!r300_emit_buffer_validate(r300, validate_vbos, - index_buffer)) { - fprintf(stderr, "r300: CS space validation failed. " - "(not enough memory?) Skipping rendering.\n"); - return FALSE; - } - - /* Consider the validation done only if everything was validated. */ - if (validate_vbos) { - r300->validate_buffers = FALSE; - } + if (!r300_emit_buffer_validate(r300, validate_vbos, + index_buffer)) { + fprintf(stderr, "r300: CS space validation failed. " + "(not enough memory?) Skipping rendering.\n"); + return FALSE; } r300_emit_dirty_state(r300); - if (r300->screen->caps.index_bias_supported) { + if (r300->screen->caps.is_r500) { if (r300->screen->caps.has_tcl) r500_emit_index_bias(r300, index_bias); else r500_emit_index_bias(r300, 0); } - if (emit_aos) - r300_emit_aos(r300, aos_offset, indexed); + if (emit_vertex_arrays && + (r300->vertex_arrays_dirty || + r300->vertex_arrays_indexed != indexed || + r300->vertex_arrays_offset != buffer_offset || + r300->vertex_arrays_instance_id != instance_id)) { + r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); + + r300->vertex_arrays_dirty = FALSE; + r300->vertex_arrays_indexed = indexed; + r300->vertex_arrays_offset = buffer_offset; + r300->vertex_arrays_instance_id = instance_id; + } - if (emit_aos_swtcl) - r300_emit_aos_swtcl(r300, indexed); + if (emit_vertex_arrays_swtcl) + r300_emit_vertex_arrays_swtcl(r300, indexed); } return TRUE; @@ -281,28 +292,32 @@ static boolean r300_emit_states(struct r300_context *r300, * \param flags See r300_prepare_flags. * \param index_buffer The index buffer to validate. The parameter may be NULL. * \param cs_dwords The number of dwords to reserve in CS. - * \param aos_offset The offset passed to emit_aos. + * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id The instance to render. * \return TRUE if rendering should be skipped */ static boolean r300_prepare_for_rendering(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, unsigned cs_dwords, - int aos_offset, - int index_bias) + int buffer_offset, + int index_bias, + int instance_id) { + /* Make sure there is enough space in the command stream and emit states. */ if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) - flags |= PREP_FIRST_DRAW; + flags |= PREP_EMIT_STATES; - return r300_emit_states(r300, flags, index_buffer, aos_offset, index_bias); + return r300_emit_states(r300, flags, index_buffer, buffer_offset, + index_bias, instance_id); } static boolean immd_is_good_idea(struct r300_context *r300, unsigned count) { struct pipe_vertex_element* velem; - struct pipe_vertex_buffer* vbuf; + struct pipe_resource *buf; boolean checked[PIPE_MAX_ATTRIBS] = {0}; unsigned vertex_element_count = r300->velems->count; unsigned i, vbi; @@ -326,18 +341,12 @@ static boolean immd_is_good_idea(struct r300_context *r300, vbi = velem->vertex_buffer_index; if (!checked[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; + buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; - if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) { + if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) { return FALSE; } - if (r300_buffer_is_referenced(&r300->context, - vbuf->buffer, - R300_REF_CS | R300_REF_HW)) { - /* It's a very bad idea to map it... */ - return FALSE; - } checked[vbi] = TRUE; } } @@ -348,10 +357,8 @@ static boolean immd_is_good_idea(struct r300_context *r300, * The HWTCL draw functions. * ****************************************************************************/ -static void r300_emit_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_arrays_immediate(struct r300_context *r300, + const struct pipe_draw_info *info) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; @@ -362,7 +369,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ - unsigned dwords = 9 + count * vertex_size; + unsigned dwords = 4 + info->count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; @@ -372,47 +379,42 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; uint32_t* mapelem[PIPE_MAX_ATTRIBS]; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; CS_LOCALS(r300); - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - size[i] = r300->velems->hw_format_size[i] / 4; + size[i] = r300->velems->format_size[i] / 4; vbi = velem->vertex_buffer_index; - vbuf = &r300->vertex_buffer[vbi]; + vbuf = &r300->vbuf_mgr->vertex_buffer[vbi]; stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!transfer[vbi]) { - map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - vbuf->buffer, - PIPE_TRANSFER_READ, - &transfer[vbi]); - map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; + if (!map[vbi]) { + map[vbi] = (uint32_t*)r300->rws->buffer_map( + r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf, + r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); + map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); } + r300_emit_draw_init(r300, info->mode, 0, info->count-1); + BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | - r300_translate_primitive(mode)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) | + r300_translate_primitive(info->mode)); /* Emit vertices. */ - for (v = 0; v < count; v++) { + for (v = 0; v < info->count; v++) { for (i = 0; i < vertex_element_count; i++) { OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } @@ -423,10 +425,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (transfer[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; - pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); - transfer[vbi] = NULL; + if (map[vbi]) { + r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf); + map[vbi] = NULL; } } } @@ -444,15 +445,12 @@ static void r300_emit_draw_arrays(struct r300_context *r300, return; } - BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); + r300_emit_draw_init(r300, mode, 0, count-1); + + BEGIN_CS(2 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode) | @@ -463,37 +461,53 @@ static void r300_emit_draw_arrays(struct r300_context *r300, static void r300_emit_draw_elements(struct r300_context *r300, struct pipe_resource* indexBuffer, unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, + unsigned min_index, + unsigned max_index, unsigned mode, unsigned start, - unsigned count) + unsigned count, + uint16_t *imm_indices3) { - uint32_t count_dwords; - uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); + uint32_t count_dwords, offset_dwords; boolean alt_num_verts = count > 65535; CS_LOCALS(r300); - if (count >= (1 << 24)) { + if (count >= (1 << 24) || max_index >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); + "refusing to render (max_index: %i).\n", count, max_index); return; } - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); - DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", - count, minIndex, maxIndex); + count, min_index, max_index); + + r300_emit_draw_init(r300, mode, min_index, max_index); + + /* If start is odd, render the first triangle with indices embedded + * in the command stream. This will increase start by 3 and make it + * even. We can then proceed without a fallback. */ + if (indexSize == 2 && (start & 1) && + mode == PIPE_PRIM_TRIANGLES) { + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) | + R300_VAP_VF_CNTL__PRIM_TRIANGLES); + OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]); + OUT_CS(imm_indices3[2]); + END_CS; - BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); + start += 3; + count -= 3; + if (!count) + return; + } + + offset_dwords = indexSize * start / sizeof(uint32_t); + + BEGIN_CS(8 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(maxIndex); - OUT_CS(minIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; @@ -508,68 +522,169 @@ static void r300_emit_draw_elements(struct r300_context *r300, (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } - /* INDX_BUFFER is a truly special packet3. - * Unlike most other packet3, where the offset is after the count, - * the order is reversed, so the relocation ends up carrying the - * size of the indexbuf instead of the offset. - */ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0); + OUT_CS(count_dwords); + OUT_CS_RELOC(r300_resource(indexBuffer)); + END_CS; +} + +static void r300_draw_elements_immediate(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + uint8_t *ptr1; + uint16_t *ptr2; + uint32_t *ptr4; + unsigned index_size = r300->index_buffer.index_size; + unsigned i, count_dwords = index_size == 4 ? info->count : + (info->count + 1) / 2; + CS_LOCALS(r300); + + /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) + return; + + r300_emit_draw_init(r300, info->mode, info->min_index, info->max_index); + + BEGIN_CS(2 + count_dwords); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); + switch (index_size) { + case 1: + ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr1 += info->start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + r300_translate_primitive(info->mode)); + + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr1[i+1] + info->index_bias) << 16) | + (ptr1[i] + info->index_bias)); + + if (info->count & 1) + OUT_CS(ptr1[i] + info->index_bias); + } else { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr1[i+1]) << 16) | + (ptr1[i] )); + + if (info->count & 1) + OUT_CS(ptr1[i]); + } + break; + + case 2: + ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr2 += info->start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + r300_translate_primitive(info->mode)); + + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr2[i+1] + info->index_bias) << 16) | + (ptr2[i] + info->index_bias)); + + if (info->count & 1) + OUT_CS(ptr2[i] + info->index_bias); + } else { + OUT_CS_TABLE(ptr2, count_dwords); + } + break; + + case 4: + ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr4 += info->start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | + r300_translate_primitive(info->mode)); + + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count; i++) + OUT_CS(ptr4[i] + info->index_bias); + } else { + OUT_CS_TABLE(ptr4, count_dwords); + } + break; + } END_CS; } -/* This is the fast-path drawing & emission for HW TCL. */ -static void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_elements(struct r300_context *r300, + const struct pipe_draw_info *info, + int instance_id) { - struct r300_context* r300 = r300_context(pipe); + struct pipe_resource *indexBuffer = r300->index_buffer.buffer; + unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; + unsigned start = info->start; + unsigned count = info->count; boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ - unsigned new_offset; + uint16_t indices3[3]; - if (indexBias && !r300->screen->caps.index_bias_supported) { - r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); + if (info->index_bias && !r300->screen->caps.is_r500) { + r300_split_index_bias(r300, info->index_bias, &buffer_offset, &index_offset); } r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, &start, count); - r300_update_derived_state(r300); - r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset); - - start = new_offset; + /* Fallback for misaligned ushort indices. */ + if (indexSize == 2 && (start & 1) && + !r300_resource(indexBuffer)->b.user_ptr) { + /* If we got here, then orgIndexBuffer == indexBuffer. */ + uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf, + r300->cs, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED); + + if (info->mode == PIPE_PRIM_TRIANGLES) { + memcpy(indices3, ptr + start, 6); + } else { + /* Copy the mapped index buffer directly to the upload buffer. + * The start index will be aligned simply from the fact that + * every sub-buffer in the upload buffer is aligned. */ + r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, + count, (uint8_t*)ptr); + } + r300->rws->buffer_unmap(r300_resource(orgIndexBuffer)->buf); + } else { + if (r300_resource(indexBuffer)->b.user_ptr) + r300_upload_index_buffer(r300, &indexBuffer, indexSize, + &start, count, + r300_resource(indexBuffer)->b.user_ptr); + } - /* 15 dwords for emit_draw_elements. Give up if the function fails. */ + /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias)) + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; if (alt_num_verts || count <= 65535) { - r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); + r300_emit_draw_elements(r300, indexBuffer, indexSize, info->min_index, + info->max_index, info->mode, start, count, + indices3); } else { do { - short_count = MIN2(count, 65534); + if (indexSize == 2 && (start & 1)) + short_count = MIN2(count, 65535); + else + short_count = MIN2(count, 65534); + r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, - mode, start, short_count); + info->min_index, info->max_index, + info->mode, start, short_count, indices3); start += short_count; count -= short_count; @@ -578,7 +693,8 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias)) + indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; } } while (count); @@ -590,107 +706,112 @@ done: } } -static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +static void r300_draw_arrays(struct r300_context *r300, + const struct pipe_draw_info *info, + int instance_id) { - struct r300_context* r300 = r300_context(pipe); boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + info->count > 65536; + unsigned start = info->start; + unsigned count = info->count; unsigned short_count; - r300_update_derived_state(r300); + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + NULL, 9, start, 0, instance_id)) + return; - if (immd_is_good_idea(r300, count)) { - r300_emit_draw_arrays_immediate(r300, mode, start, count); + if (alt_num_verts || count <= 65535) { + r300_emit_draw_arrays(r300, info->mode, count); } else { - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0)) - return; + do { + short_count = MIN2(count, 65535); + r300_emit_draw_arrays(r300, info->mode, short_count); - if (alt_num_verts || count <= 65535) { - r300_emit_draw_arrays(r300, mode, count); - } else { - do { - short_count = MIN2(count, 65535); - r300_emit_draw_arrays(r300, mode, short_count); - - start += short_count; - count -= short_count; - - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (count) { - if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0)) - return; - } - } while (count); - } + start += short_count; + count -= short_count; + + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (count) { + if (!r300_prepare_for_rendering(r300, + PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, + start, 0, instance_id)) + return; + } + } while (count); } } +static void r300_draw_arrays_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_arrays(r300, info, i); +} + +static void r300_draw_elements_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_elements(r300, info, i); +} + static void r300_draw_vbo(struct pipe_context* pipe, - const struct pipe_draw_info *info) + const struct pipe_draw_info *dinfo) { struct r300_context* r300 = r300_context(pipe); - unsigned count = info->count; - boolean translate = FALSE; - boolean indexed = info->indexed && r300->index_buffer.buffer; - unsigned start_indexed = 0; + struct pipe_draw_info info = *dinfo; + boolean buffers_updated, uploader_flushed; - if (r300->skip_rendering) { - return; - } + info.indexed = info.indexed && r300->index_buffer.buffer; - if (!u_trim_pipe_prim(info->mode, &count)) { + if (r300->skip_rendering || + !u_trim_pipe_prim(info.mode, &info.count)) { return; } - /* Index buffer range checking. */ - if (indexed) { - assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); - - /* Compute start for draw_elements, taking the offset into account. */ - start_indexed = - info->start + - (r300->index_buffer.offset / r300->index_buffer.index_size); - - if ((start_indexed + count) * r300->index_buffer.index_size > - r300->index_buffer.buffer->width0) { - fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); - return; - } - } + r300_update_derived_state(r300); - /* Set up fallback for incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300); - translate = TRUE; + /* Start the vbuf manager and update buffers if needed. */ + u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info, + &buffers_updated, &uploader_flushed); + if (buffers_updated) { + r300->vertex_arrays_dirty = TRUE; } - if (indexed) { - r300_draw_range_elements(pipe, - r300->index_buffer.buffer, - r300->index_buffer.index_size, - info->index_bias, - info->min_index, - info->max_index, - info->mode, - start_indexed, - count); + /* Draw. */ + if (info.indexed) { + info.start += r300->index_buffer.offset; + info.max_index = MIN2(r300->vbuf_mgr->max_index, info.max_index); + + if (info.instance_count <= 1) { + if (info.count <= 8 && + r300_resource(r300->index_buffer.buffer)->b.user_ptr) { + r300_draw_elements_immediate(r300, &info); + } else { + r300_draw_elements(r300, &info, -1); + } + } else { + r300_draw_elements_instanced(r300, &info); + } } else { - r300_draw_arrays(pipe, - info->mode, - info->start, - count); + if (info.instance_count <= 1) { + if (immd_is_good_idea(r300, info.count)) { + r300_draw_arrays_immediate(r300, &info); + } else { + r300_draw_arrays(r300, &info, -1); + } + } else { + r300_draw_arrays_instanced(r300, &info); + } } - if (translate) { - r300_end_vertex_translate(r300); - } + u_vbuf_mgr_draw_end(r300->vbuf_mgr); } /**************************************************************************** @@ -721,15 +842,16 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); r300_reserve_cs_dwords(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, + r300->vbuf_mgr->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } @@ -737,7 +859,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (indexed) { indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, - PIPE_TRANSFER_READ, &ib_transfer); + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer); } draw_set_mapped_index_buffer(r300->draw, indices); @@ -748,16 +871,15 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); r300->draw_vbo_locked = FALSE; - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } } if (indexed) { - pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + pipe_buffer_unmap(pipe, ib_transfer); draw_set_mapped_index_buffer(r300->draw, NULL); } } @@ -813,10 +935,10 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, pipe_resource_reference(&r300->vbo, NULL); r300->vbo = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, R300_MAX_DRAW_VBO_SIZE); r300->draw_vbo_offset = 0; r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE; - r300->validate_buffers = TRUE; } r300render->vertex_size = vertex_size; @@ -835,7 +957,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context, r300->vbo, - PIPE_TRANSFER_WRITE, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, &r300render->vbo_transfer); assert(r300render->vbo_ptr); @@ -857,7 +980,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); - pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer); + pipe_buffer_unmap(context, r300render->vbo_transfer); r300render->vbo_transfer = NULL; } @@ -901,33 +1024,16 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 6, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + NULL, 0, 0, -1)) return; } - /* Uncomment to dump all VBOs rendered through this interface. - * Slow and noisy! - ptr = pipe_buffer_map(&r300render->r300->context, - r300render->vbo, PIPE_TRANSFER_READ, - &r300render->vbo_transfer); - - for (i = 0; i < count; i++) { - printf("r300: Vertex %d\n", i); - draw_dump_emitted_vertex(&r300->vertex_info, ptr); - ptr += r300->vertex_info.size * 4; - printf("\n"); - } - - pipe_buffer_unmap(&r300render->r300->context, r300render->vbo, - r300render->vbo_transfer); - */ - BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); @@ -958,13 +1064,13 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 0, 0, -1)) return; } @@ -1001,7 +1107,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, -1)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); @@ -1022,8 +1128,7 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->r300 = r300; - /* XXX find real numbers plz */ - r300render->base.max_vertex_buffer_bytes = 128 * 1024; + r300render->base.max_vertex_buffer_bytes = 1024 * 1024; r300render->base.max_indices = 16 * 1024; r300render->base.get_vertex_info = r300_render_get_vertex_info; @@ -1106,7 +1211,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); @@ -1190,7 +1295,7 @@ static void r300_resource_resolve(struct pipe_context* pipe, aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; - r300->aa_state.size = 12; + r300->aa_state.size = 10; r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 41a43b04de..f8c7558f4b 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -20,203 +20,64 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * The functions below translate vertex and index buffers to the layout - * compatible with the hardware, so that all vertex and index fetches are - * DWORD-aligned and all used vertex and index formats are supported. - * For indices, an optional index offset is added to each index. - */ - #include "r300_context.h" -#include "translate/translate.h" #include "util/u_index_modify.h" +#include "util/u_upload_mgr.h" -void r300_begin_vertex_translate(struct r300_context *r300) -{ - struct pipe_context *pipe = &r300->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r300_vertex_element_state *ve = r300->velems; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[ve->velem[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->velem[i].src_format == ve->hw_format[i] && - (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->velem[i].vertex_buffer_index; - te->input_format = ve->velem[i].src_format; - te->input_offset = vb->buffer_offset + ve->velem[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->velem[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(r300->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = r300->vertex_buffer_max_index + 1; - - out_buffer = pipe_buffer_create(&r300->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - } - } - - pipe_buffer_unmap(pipe, out_buffer, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); - vb->buffer_offset = 0; - vb->max_index = num_verts - 1; - vb->stride = key.output_stride; - r300->tran.vb_slot = i; - r300->validate_buffers = TRUE; - break; - } - } - - /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - r300->tran.saved_velems = r300->velems; - - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->velem[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->velem[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = r300->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->velem[i], - sizeof(struct pipe_vertex_element)); - } - } - - r300->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); - } - - pipe_resource_reference(&out_buffer, NULL); -} - -void r300_end_vertex_translate(struct r300_context *r300) -{ - struct pipe_context *pipe = &r300->context; - - /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); - pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer, - NULL); -} void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); - *index_size = 2; - *start = 0; - r300->validate_buffers = TRUE; - break; + case 1: + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r300->context, *index_buffer, index_offset, + *start, count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *index_size = 2; + *start = out_offset / 2; + break; + + case 2: + if (index_offset) { + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 2; + } + break; - case 2: - if (*start % 2 != 0 || index_offset) { - util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + case 4: + if (index_offset) { + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 4, + &out_offset, &out_buffer, &flushed, &ptr); - case 4: - if (index_offset) { - util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 4; + } + break; } } diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index dd1df97059..6593474b1f 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -38,26 +38,14 @@ r300_resource_create(struct pipe_screen *screen, } -static struct pipe_resource * -r300_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) -{ - if (templ->target == PIPE_BUFFER) - return NULL; - else - return r300_texture_from_handle(screen, templ, whandle); -} - void r300_init_resource_functions(struct r300_context *r300) { r300->context.get_transfer = u_get_transfer_vtbl; r300->context.transfer_map = u_transfer_map_vtbl; - r300->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r300->context.transfer_flush_region = u_default_transfer_flush_region; r300->context.transfer_unmap = u_transfer_unmap_vtbl; r300->context.transfer_destroy = u_transfer_destroy_vtbl; r300->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r300->context.is_resource_referenced = u_is_resource_referenced_vtbl; r300->context.create_surface = r300_create_surface; r300->context.surface_destroy = r300_surface_destroy; } @@ -65,8 +53,8 @@ void r300_init_resource_functions(struct r300_context *r300) void r300_init_screen_resource_functions(struct r300_screen *r300screen) { r300screen->screen.resource_create = r300_resource_create; - r300screen->screen.resource_from_handle = r300_resource_from_handle; - r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r300screen->screen.resource_from_handle = r300_texture_from_handle; + r300screen->screen.resource_get_handle = r300_resource_get_handle; r300screen->screen.resource_destroy = u_resource_destroy_vtbl; r300screen->screen.user_buffer_create = r300_user_buffer_create; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 921d6f1e67..8a69628c53 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -24,6 +24,7 @@ #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "util/u_memory.h" +#include "os/os_time.h" #include "r300_context.h" #include "r300_texture.h" @@ -83,12 +84,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen) static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { struct r300_screen* r300screen = r300_screen(pscreen); - boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -116,6 +113,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; @@ -125,12 +123,17 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DUAL_SOURCE_BLEND: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_ARRAY_TEXTURES: + return 0; + + /* SWTCL-only features. */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: - return 0; + case PIPE_CAP_TGSI_INSTANCEID: + return !r300screen->caps.has_tcl; /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: @@ -171,9 +174,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (shader) { case PIPE_SHADER_FRAGMENT: @@ -306,10 +306,11 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { + struct r300_winsys_screen *rws = r300_screen(screen)->rws; uint32_t retval = 0; + boolean drm_2_8_0 = rws->get_value(rws, R300_VID_DRM_2_8_0); boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -317,9 +318,13 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10SG10SB10SA2U_NORM; boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM || - format == PIPE_FORMAT_RGTC1_SNORM; + format == PIPE_FORMAT_RGTC1_SNORM || + format == PIPE_FORMAT_LATC1_UNORM || + format == PIPE_FORMAT_LATC1_SNORM; boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM || - format == PIPE_FORMAT_RGTC2_SNORM; + format == PIPE_FORMAT_RGTC2_SNORM || + format == PIPE_FORMAT_LATC2_UNORM || + format == PIPE_FORMAT_LATC2_SNORM; boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || format == PIPE_FORMAT_R16G16_FLOAT || format == PIPE_FORMAT_R16G16B16_FLOAT || @@ -363,7 +368,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) && /* 2101010 cannot be rendered to on non-r5xx. */ - (is_r500 || !is_color2101010) && + (!is_color2101010 || (is_r500 && drm_2_8_0)) && r300_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | @@ -401,6 +406,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); util_slab_destroy(&r300screen->pool_buffers); + pipe_mutex_destroy(r300screen->num_contexts_mutex); if (rws) rws->destroy(rws); @@ -412,33 +418,44 @@ static void r300_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct r300_fence **oldf = (struct r300_fence**)ptr; - struct r300_fence *newf = (struct r300_fence*)fence; - - if (pipe_reference(&(*oldf)->reference, &newf->reference)) - FREE(*oldf); - - *ptr = fence; + r300_winsys_bo_reference((struct r300_winsys_bo**)ptr, + (struct r300_winsys_bo*)fence); } -static int r300_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) +static boolean r300_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - return rfence->signalled ? 0 : 1; /* 0 == success */ + return !rws->buffer_is_busy(rfence); } -static int r300_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) +static boolean r300_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + uint64_t timeout) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; + + if (timeout != PIPE_TIMEOUT_INFINITE) { + int64_t start_time = os_time_get(); - r300_finish(rfence->ctx); - rfence->signalled = TRUE; - return 0; /* 0 == success */ + /* Convert to microseconds. */ + timeout /= 1000; + + /* Wait in a loop. */ + while (rws->buffer_is_busy(rfence)) { + if (os_time_get() - start_time >= timeout) { + return FALSE; + } + os_time_sleep(10); + } + return TRUE; + } + + rws->buffer_wait(rfence); + return TRUE; } struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) @@ -457,12 +474,15 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); - r300screen->caps.index_bias_supported = - r300screen->caps.is_r500 && - rws->get_value(rws, R300_VID_DRM_2_3_0); + if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) + r300screen->caps.zmask_ram = 0; + if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) + r300screen->caps.hiz_ram = 0; + + pipe_mutex_init(r300screen->num_contexts_mutex); util_slab_create(&r300screen->pool_buffers, - sizeof(struct r300_buffer), 64, + sizeof(struct r300_resource), 64, UTIL_SLAB_SINGLETHREADED); r300screen->rws = rws; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 5847fe1ffc..576f9c1f4a 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -52,6 +52,7 @@ struct r300_screen { /* The number of created contexts to know whether we have multiple * contexts or not. */ int num_contexts; + pipe_mutex num_contexts_mutex; }; @@ -93,6 +94,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_CBZB (1 << 11) #define DBG_HYPERZ (1 << 12) #define DBG_SCISSOR (1 << 13) +#define DBG_UPLOAD (1 << 14) +#define DBG_INFO (1 << 15) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) @@ -100,6 +103,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FAKE_OCC (1 << 19) #define DBG_NO_OPT (1 << 20) #define DBG_NO_CBZB (1 << 21) +#define DBG_NO_ZMASK (1 << 22) +#define DBG_NO_HIZ (1 << 23) /* Statistics. */ #define DBG_P_STAT (1 << 25) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4436443522..986ae384fb 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -33,109 +33,36 @@ #include "r300_screen_buffer.h" #include "r300_winsys.h" -unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain) +void r300_upload_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned index_size, unsigned *start, + unsigned count, uint8_t *ptr) { - struct r300_context *r300 = r300_context(context); - struct r300_buffer *rbuf = r300_buffer(buf); + unsigned index_offset; + boolean flushed; - if (r300_buffer_is_user_buffer(buf)) - return PIPE_UNREFERENCED; + *index_buffer = NULL; - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + u_upload_data(r300->vbuf_mgr->uploader, + 0, count * index_size, + ptr + (*start * index_size), + &index_offset, + index_buffer, &flushed); - return PIPE_UNREFERENCED; -} - -static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - return r300_buffer_is_referenced(context, buf, R300_REF_CS); -} - -/* External helper, not required to implent u_resource_vtbl: - */ -int r300_upload_index_buffer(struct r300_context *r300, - struct pipe_resource **index_buffer, - unsigned index_size, - unsigned start, - unsigned count, - unsigned *out_offset) -{ - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = start * index_size; - int ret = 0; - - if (r300_buffer_is_user_buffer(*index_buffer)) { - ret = u_upload_buffer(r300->upload_ib, - index_offset, - count * index_size, - *index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - *index_buffer = upload_buffer; - *out_offset = index_offset / index_size; - } else - *out_offset = start; - - done: - // if (upload_buffer) - // pipe_resource_reference(&upload_buffer, NULL); - return ret; -} - -/* External helper, not required to implement u_resource_vtbl: - */ -int r300_upload_user_buffers(struct r300_context *r300) -{ - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = r300->velems->count; - - for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index]; - - if (r300_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(r300->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; - vb->buffer_offset = upload_offset; - r300->validate_buffers = TRUE; - } - } - return ret; + *start = index_offset / index_size; } static void r300_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf = r300_buffer(buf); - struct r300_winsys_screen *rws = r300screen->rws; + struct r300_resource *rbuf = r300_resource(buf); if (rbuf->constant_buffer) FREE(rbuf->constant_buffer); if (rbuf->buf) - rws->buffer_reference(rws, &rbuf->buf, NULL); + r300_winsys_bo_reference(&rbuf->buf, NULL); util_slab_free(&r300screen->pool_buffers, rbuf); } @@ -179,93 +106,31 @@ r300_buffer_transfer_map( struct pipe_context *pipe, struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); uint8_t *map; - boolean flush = FALSE; - unsigned i; - if (rbuf->user_buffer) - return (uint8_t *) rbuf->user_buffer + transfer->box.x; + if (rbuf->b.user_ptr) + return (uint8_t *) rbuf->b.user_ptr + transfer->box.x; if (rbuf->constant_buffer) return (uint8_t *) rbuf->constant_buffer + transfer->box.x; - /* check if the mapping is to a range we already flushed */ - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuf->num_ranges; i++) { - if ((transfer->box.x >= rbuf->ranges[i].start) && - (transfer->box.x < rbuf->ranges[i].end)) - flush = TRUE; - - if (flush) { - /* unreference this hw buffer and allocate a new one */ - rws->buffer_reference(rws, &rbuf->buf, NULL); - - rbuf->num_ranges = 0; - rbuf->buf = - r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, 16, - rbuf->b.b.bind, - rbuf->b.b.usage, - rbuf->domain); - rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(r300screen->rws, - rbuf->buf); - break; - } - } - } - - map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); + map = rws->buffer_map(rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; - /* map_buffer() returned a pointer to the beginning of the buffer, - * but transfers are expected to return a pointer to just the - * region specified in the box. - */ return map + transfer->box.x; } -static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct r300_buffer *rbuf = r300_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; - - assert(box->x + box->width <= transfer->box.width); - - if (rbuf->user_buffer) - return; - if (rbuf->constant_buffer) - return; - - /* mark the range as used */ - for(i = 0; i < rbuf->num_ranges; ++i) { - if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+box->width)) { - rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset); - rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length)); - return; - } - } - - rbuf->ranges[rbuf->num_ranges].start = offset; - rbuf->ranges[rbuf->num_ranges].end = offset+length; - rbuf->num_ranges++; -} - static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); if (rbuf->buf) { - rws->buffer_unmap(rws, rbuf->buf); + rws->buffer_unmap(rbuf->buf); } } @@ -278,34 +143,33 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, unsigned stride, unsigned layer_stride) { - struct r300_buffer *rbuf = r300_buffer(resource); - struct pipe_transfer *transfer = NULL; + struct r300_context *r300 = r300_context(pipe); + struct r300_winsys_screen *rws = r300->screen->rws; + struct r300_resource *rbuf = r300_resource(resource); uint8_t *map = NULL; if (rbuf->constant_buffer) { memcpy(rbuf->constant_buffer + box->x, data, box->width); return; } + assert(rbuf->b.user_ptr == NULL); - transfer = r300_buffer_get_transfer(pipe, resource, 0, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, box); - map = r300_buffer_transfer_map(pipe, transfer); + map = rws->buffer_map(rbuf->buf, r300->cs, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); - memcpy(map, data, box->width); + memcpy(map + box->x, data, box->width); - r300_buffer_transfer_unmap(pipe, transfer); - r300_buffer_transfer_destroy(pipe, transfer); + rws->buffer_unmap(rbuf->buf); } -struct u_resource_vtbl r300_buffer_vtbl = +static const struct u_resource_vtbl r300_buffer_vtbl = { - u_default_resource_get_handle, /* get_handle */ + NULL, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ - r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ r300_buffer_get_transfer, /* get_transfer */ r300_buffer_transfer_destroy, /* transfer_destroy */ r300_buffer_transfer_map, /* transfer_map */ - r300_buffer_transfer_flush_region, /* transfer_flush_region */ + NULL, /* transfer_flush_region */ r300_buffer_transfer_unmap, /* transfer_unmap */ r300_buffer_transfer_inline_write /* transfer_inline_write */ }; @@ -314,73 +178,68 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; unsigned alignment = 16; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - - rbuf->b.b = *templ; - rbuf->b.vtbl = &r300_buffer_vtbl; - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.b.screen = screen; + rbuf->b.b.b = *templ; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.user_ptr = NULL; rbuf->domain = R300_DOMAIN_GTT; - rbuf->num_ranges = 0; rbuf->buf = NULL; + rbuf->buf_size = templ->width0; rbuf->constant_buffer = NULL; - rbuf->user_buffer = NULL; /* Alloc constant buffers in RAM. */ if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) { rbuf->constant_buffer = MALLOC(templ->width0); - return &rbuf->b.b; + return &rbuf->b.b.b; } rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, alignment, - rbuf->b.b.bind, rbuf->b.b.usage, + rbuf->b.b.b.width0, alignment, + rbuf->b.b.b.bind, rbuf->b.b.b.usage, rbuf->domain); - rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf); - if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; } - return &rbuf->b.b; + rbuf->cs_buf = + r300screen->rws->buffer_get_cs_handle(rbuf->buf); + + return &rbuf->b.b.b; } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, + void *ptr, unsigned size, unsigned bind) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.vtbl = &r300_buffer_vtbl; - rbuf->b.b.screen = screen; - rbuf->b.b.target = PIPE_BUFFER; - rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; - rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; - rbuf->b.b.bind = bind; - rbuf->b.b.width0 = bytes; - rbuf->b.b.height0 = 1; - rbuf->b.b.depth0 = 1; - rbuf->b.b.array_size = 1; - rbuf->b.b.flags = 0; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.b.b.target = PIPE_BUFFER; + rbuf->b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuf->b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuf->b.b.b.bind = bind; + rbuf->b.b.b.width0 = ~0; + rbuf->b.b.b.height0 = 1; + rbuf->b.b.b.depth0 = 1; + rbuf->b.b.b.array_size = 1; + rbuf->b.b.b.flags = 0; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + rbuf->b.user_ptr = ptr; rbuf->domain = R300_DOMAIN_GTT; - rbuf->num_ranges = 0; rbuf->buf = NULL; + rbuf->buf_size = size; rbuf->constant_buffer = NULL; - rbuf->user_buffer = ptr; - return &rbuf->b.b; + return &rbuf->b.b.b; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 0b3555dd81..cdbc4425fc 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -35,53 +35,19 @@ #include "r300_winsys.h" #include "r300_context.h" -#define R300_BUFFER_MAGIC 0xabcd1234 -#define R300_BUFFER_MAX_RANGES 32 - -struct r300_buffer_range { - uint32_t start; - uint32_t end; -}; - -/* Vertex buffer. */ -struct r300_buffer -{ - struct u_resource b; - - uint32_t magic; - - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; - - enum r300_buffer_domain domain; - - uint8_t *user_buffer; - uint8_t *constant_buffer; - struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; - unsigned num_ranges; -}; - /* Functions. */ -int r300_upload_user_buffers(struct r300_context *r300); - -int r300_upload_index_buffer(struct r300_context *r300, - struct pipe_resource **index_buffer, - unsigned index_size, - unsigned start, - unsigned count, unsigned *out_offset); +void r300_upload_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned index_size, unsigned *start, + unsigned count, uint8_t *ptr); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned usage); - -unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain); + void *ptr, unsigned size, + unsigned bind); /* Inline functions. */ @@ -90,9 +56,4 @@ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) return (struct r300_buffer *)buffer; } -static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer) -{ - return r300_buffer(buffer)->user_buffer ? true : false; -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 7529253240..ecb4fc691c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -24,10 +24,12 @@ #include "draw/draw_context.h" #include "util/u_framebuffer.h" +#include "util/u_half.h" #include "util/u_math.h" #include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -44,7 +46,6 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" -#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -187,12 +188,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */ + uint32_t blend_control_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */ uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */ uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */ CB_LOCALS; + blend->state = *state; + if (state->rt[0].blend_enable) { unsigned eqRGB = state->rt[0].rgb_func; @@ -205,10 +210,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ - blend_control = R300_ALPHA_BLEND_ENABLE | - r300_translate_blend_function(eqRGB) | + blend_control = blend_control_noclamp = + R300_ALPHA_BLEND_ENABLE | ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); + blend_control |= + r300_translate_blend_function(eqRGB, TRUE); + blend_control_noclamp |= + r300_translate_blend_function(eqRGB, FALSE); /* Optimization: some operations do not require the destination color. * @@ -230,6 +239,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { /* Enable reading from the colorbuffer. */ blend_control |= R300_READ_ENABLE; + blend_control_noclamp |= R300_READ_ENABLE; if (r300screen->caps.is_r500) { /* Optimization: Depending on incoming pixels, we can @@ -305,10 +315,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { blend_control |= R300_SEPARATE_ALPHA_ENABLE; - alpha_blend_control = - r300_translate_blend_function(eqA) | + blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE; + alpha_blend_control = alpha_blend_control_noclamp = (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); + alpha_blend_control |= + r300_translate_blend_function(eqA, TRUE); + alpha_blend_control_noclamp |= + r300_translate_blend_function(eqA, FALSE); } } @@ -345,7 +359,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, */ /* Build a command buffer. */ - BEGIN_CB(blend->cb, 8); + BEGIN_CB(blend->cb_clamp, 8); OUT_CB_REG(R300_RB3D_ROPCNTL, rop); OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); OUT_CB(blend_control); @@ -354,6 +368,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); END_CB; + /* Build a command buffer. */ + BEGIN_CB(blend->cb_noclamp, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(blend_control_noclamp); + OUT_CB(alpha_blend_control_noclamp); + OUT_CB(color_channel_mask); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + /* The same as above, but with no colorbuffer reads and writes. */ BEGIN_CB(blend->cb_no_readwrite, 8); OUT_CB_REG(R300_RB3D_ROPCNTL, rop); @@ -374,6 +398,10 @@ static void r300_bind_blend_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); UPDATE_STATE(state, r300->blend_state); + + if (r300->fs.state && r300_pick_fragment_shader(r300)) { + r300_mark_fs_code_dirty(r300); + } } /* Free blend state. */ @@ -395,22 +423,64 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); - struct r300_blend_color_state* state = + struct pipe_framebuffer_state *fb = r300->fb_state.state; + struct r300_blend_color_state *state = (struct r300_blend_color_state*)r300->blend_color_state.state; + struct pipe_blend_color c; + enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0; CB_LOCALS; + state->state = *color; /* Save it, so that we can reuse it in set_fb_state */ + c = *color; + + /* The blend color is dependent on the colorbuffer format. */ + if (fb->nr_cbufs) { + switch (format) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + c.color[1] = c.color[0]; + break; + + case PIPE_FORMAT_A8_UNORM: + c.color[1] = c.color[3]; + break; + + case PIPE_FORMAT_R8G8_UNORM: + c.color[2] = c.color[1]; + break; + + case PIPE_FORMAT_L8A8_UNORM: + c.color[2] = c.color[3]; + break; + + default:; + } + } + if (r300->screen->caps.is_r500) { - /* XXX if FP16 blending is enabled, we should use the FP16 format */ BEGIN_CB(state->cb, 3); OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); - OUT_CB(float_to_fixed10(color->color[0]) | - (float_to_fixed10(color->color[3]) << 16)); - OUT_CB(float_to_fixed10(color->color[2]) | - (float_to_fixed10(color->color[1]) << 16)); + + switch (format) { + case PIPE_FORMAT_R16G16B16A16_FLOAT: + OUT_CB(util_float_to_half(c.color[2]) | + (util_float_to_half(c.color[3]) << 16)); + OUT_CB(util_float_to_half(c.color[0]) | + (util_float_to_half(c.color[1]) << 16)); + break; + + default: + OUT_CB(float_to_fixed10(c.color[0]) | + (float_to_fixed10(c.color[3]) << 16)); + OUT_CB(float_to_fixed10(c.color[2]) | + (float_to_fixed10(c.color[1]) << 16)); + } + END_CB; } else { union util_color uc; - util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); BEGIN_CB(state->cb, 2); OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui); @@ -442,8 +512,7 @@ static void r300_set_clip_state(struct pipe_context* pipe, OUT_CB_TABLE(state->ucp, state->nr * 4); } OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN | - (state->depth_clamp ? R300_CLIP_DISABLE : 0)); + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CB; r300_mark_atom_dirty(r300, &r300->clip_state); @@ -538,29 +607,54 @@ static void* r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - /* We could use 10bit alpha ref but who needs that? */ dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); + dsa->alpha_value = util_float_to_half(state->alpha.ref_value); - if (caps->is_r500) + if (caps->is_r500) { + dsa->alpha_function_fp16 = dsa->alpha_function | + R500_FG_ALPHA_FUNC_FP16_ENABLE; dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; + } } - BEGIN_CB(&dsa->cb_begin, 8); + BEGIN_CB(&dsa->cb_begin, 10); OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); OUT_CB(dsa->z_buffer_control); OUT_CB(dsa->z_stencil_control); OUT_CB(dsa->stencil_ref_mask); OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); END_CB; - BEGIN_CB(dsa->cb_no_readwrite, 8); + BEGIN_CB(&dsa->cb_begin_fp16, 10); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(dsa->z_buffer_control); + OUT_CB(dsa->z_stencil_control); + OUT_CB(dsa->stencil_ref_mask); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); + END_CB; + + BEGIN_CB(dsa->cb_zb_no_readwrite, 10); OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); OUT_CB(0); OUT_CB(0); OUT_CB(0); OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); + END_CB; + + BEGIN_CB(dsa->cb_fp16_zb_no_readwrite, 10); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(0); + OUT_CB(0); + OUT_CB(0); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); END_CB; return (void*)dsa; @@ -617,21 +711,16 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } static void r300_tex_set_tiling_flags(struct r300_context *r300, - struct r300_texture *tex, unsigned level) + struct r300_resource *tex, + unsigned level) { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->desc.macrotile[tex->surface_level] != - tex->desc.macrotile[level]) { - /* Tiling determines how DRM treats the buffer data. - * We must flush CS when changing it if the buffer is referenced. */ - if (r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS)) - r300->context.flush(&r300->context, 0, NULL); - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->desc.microtile, tex->desc.macrotile[level], - tex->desc.stride_in_bytes[0]); + if (tex->tex.macrotile[tex->surface_level] != + tex->tex.macrotile[level]) { + r300->rws->buffer_set_tiling(tex->buf, r300->cs, + tex->tex.microtile, tex->tex.macrotile[level], + tex->tex.stride_in_bytes[0]); tex->surface_level = level; } @@ -646,12 +735,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, /* Set tiling flags for new surfaces. */ for (i = 0; i < state->nr_cbufs; i++) { r300_tex_set_tiling_flags(r300, - r300_texture(state->cbufs[i]->texture), + r300_resource(state->cbufs[i]->texture), state->cbufs[i]->u.tex.level); } if (state->zsbuf) { r300_tex_set_tiling_flags(r300, - r300_texture(state->zsbuf->texture), + r300_resource(state->zsbuf->texture), state->zsbuf->u.tex.level); } } @@ -660,7 +749,7 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, const char *binding) { struct pipe_resource *tex = surf->texture; - struct r300_texture *rtex = r300_texture(tex); + struct r300_resource *rtex = r300_resource(tex); fprintf(stderr, "r300: %s[%i] Dim: %ix%i, Firstlayer: %i, " @@ -673,9 +762,9 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level, util_format_short_name(surf->format), - rtex->desc.macrotile[0] ? "YES" : " NO", - rtex->desc.microtile ? "YES" : " NO", - rtex->desc.stride_in_pixels[0], + rtex->tex.macrotile[0] ? "YES" : " NO", + rtex->tex.microtile ? "YES" : " NO", + rtex->tex.stride_in_pixels[0], tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -686,13 +775,23 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, struct pipe_framebuffer_state *state = r300->fb_state.state; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); - /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300_mark_atom_dirty(r300, &r300->gpu_flush); r300_mark_atom_dirty(r300, &r300->fb_state); - r300_mark_atom_dirty(r300, &r300->hyperz_state); + /* What is marked as dirty depends on the enum r300_fb_state_change. */ if (change == R300_CHANGED_FB_STATE) { r300_mark_atom_dirty(r300, &r300->aa_state); + r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */ + r300_set_blend_color(&r300->context, r300->blend_color_state.state); + } + + if (change == R300_CHANGED_FB_STATE || + change == R300_CHANGED_HYPERZ_FLAG) { + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + + if (change == R300_CHANGED_FB_STATE || + change == R300_CHANGED_MULTIWRITE) { r300_mark_atom_dirty(r300, &r300->fb_state_pipelined); } @@ -704,23 +803,21 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, else if (state->zsbuf) { r300->fb_state.size += 10; if (can_hyperz) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + r300->fb_state.size += 8; } /* The size of the rest of atoms stays the same. */ } static void - r300_set_framebuffer_state(struct pipe_context* pipe, - const struct pipe_framebuffer_state* state) +r300_set_framebuffer_state(struct pipe_context* pipe, + const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -736,10 +833,37 @@ static void return; } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ - if (!!old_state->nr_cbufs != !!state->nr_cbufs) { - r300_mark_atom_dirty(r300, &r300->blend_state); + if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) { + /* There is a zmask in use, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { + /* Decompress the currently bound zbuffer before we bind another one. */ + r300_decompress_zmask(r300); + r300->hiz_in_use = FALSE; + } + } else { + /* We don't bind another zbuffer, so lock the current one. */ + r300->hyperz_locked = TRUE; + pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); + } + } else if (r300->hyperz_locked && r300->locked_zbuffer) { + /* We have a locked zbuffer now, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { + /* We are binding some other zbuffer, so decompress the locked one, + * it gets unlocked automatically. */ + r300_decompress_zmask_locked_unsafe(r300); + r300->hiz_in_use = FALSE; + } else { + /* We are binding the locked zbuffer again, so unlock it. */ + r300->hyperz_locked = FALSE; + } + } } + + /* Need to reset clamping or colormask. */ + r300_mark_atom_dirty(r300, &r300->blend_state); + /* If zsbuf is set from NULL to non-NULL or vice versa.. */ if (!!old_state->zsbuf != !!state->zsbuf) { r300_mark_atom_dirty(r300, &r300->dsa_state); @@ -750,14 +874,14 @@ static void util_copy_framebuffer_state(r300->fb_state.state, state); + if (!r300->hyperz_locked) { + pipe_surface_reference(&r300->locked_zbuffer, NULL); + } + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - r300->validate_buffers = TRUE; - r300->z_compression = false; - if (state->zsbuf) { - blocksize = util_format_get_blocksize(state->zsbuf->texture->format); - switch (blocksize) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; break; @@ -765,31 +889,6 @@ static void zbuffer_bpp = 24; break; } - if (can_hyperz) { - struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_texture *tex; - int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; - int level = zs_surf->base.u.tex.level; - - tex = r300_texture(zs_surf->base.texture); - - /* work out whether we can support hiz on this buffer */ - r300_hiz_alloc_block(r300, zs_surf); - - /* work out whether we can support zmask features on this buffer */ - r300_zmask_alloc_block(r300, zs_surf, compress); - - if (tex->zmask_mem[level]) { - /* compression causes hangs on 16-bit */ - if (zbuffer_bpp == 24) - r300->z_compression = compress; - } - DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, tex->zmask_mem[level] ? 1 : 0, - tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); - } /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { @@ -801,27 +900,25 @@ static void } /* Set up AA config. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { - aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - - switch (state->cbufs[0]->texture->nr_samples) { - case 2: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - break; - case 3: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - break; - case 4: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - break; - case 6: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - break; - } - } else { - aa->aa_config = 0; + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; } + } else { + aa->aa_config = 0; } if (DBG_ON(r300, DBG_FB)) { @@ -876,16 +973,25 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; + struct pipe_framebuffer_state *fb = r300->fb_state.state; + boolean last_multi_write; if (fs == NULL) { r300->fs.state = NULL; return; } + last_multi_write = r300_fragment_shader_writes_all(r300_fs(r300)); + r300->fs.state = fs; r300_pick_fragment_shader(r300); r300_mark_fs_code_dirty(r300); + if (fb->nr_cbufs > 1 && + last_multi_write != r300_fragment_shader_writes_all(fs)) { + r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE); + } + r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ } @@ -934,12 +1040,14 @@ static void* r300_create_rs_state(struct pipe_context* pipe, uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ + uint32_t round_mode; /* R300_GA_ROUND_MODE: 0x428c */ /* Point sprites texture coordinates, 0: lower left, 1: upper right */ float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */ float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */ float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */ + boolean vclamp = TRUE; CB_LOCALS; /* Copy rasterizer state. */ @@ -1062,6 +1170,12 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } } + /* Vertex color clamping. FP20 means no clamping. */ + round_mode = + R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST | + (!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 | + R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0); + /* Build the main command buffer. */ BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE); OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); @@ -1076,6 +1190,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config); OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); + OUT_CB_REG(R300_GA_ROUND_MODE, round_mode); OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); OUT_CB_32F(point_texcoord_left); @@ -1282,7 +1397,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture *texture; + struct r300_resource *texture; unsigned i, real_num_views = 0, view_index = 0; unsigned tex_units = r300->screen->caps.num_tex_units; boolean dirty_tex = FALSE; @@ -1298,29 +1413,27 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, } for (i = 0; i < count; i++) { - if (&state->sampler_views[i]->base != views[i]) { - pipe_sampler_view_reference( - (struct pipe_sampler_view**)&state->sampler_views[i], - views[i]); + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&state->sampler_views[i], + views[i]); - if (!views[i]) { - continue; - } + if (!views[i]) { + continue; + } - /* A new sampler view (= texture)... */ - dirty_tex = TRUE; + /* A new sampler view (= texture)... */ + dirty_tex = TRUE; - /* Set the texrect factor in the fragment shader. + /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ - texture = r300_texture(views[i]->texture); - if (texture->desc.is_npot) { - r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); - } + texture = r300_resource(views[i]->texture); + if (texture->tex.is_npot) { + r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); + } - state->sampler_views[i]->texcache_region = + state->sampler_views[i]->texcache_region = r300_assign_texture_cache_region(view_index, real_num_views); - view_index++; - } + view_index++; } for (i = count; i < tex_units; i++) { @@ -1334,7 +1447,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, state->sampler_view_count = count; r300_mark_atom_dirty(r300, &r300->textures_state); - r300->validate_buffers = TRUE; if (dirty_tex) { r300_mark_atom_dirty(r300, &r300->texture_cache_inval); @@ -1347,7 +1459,7 @@ r300_create_sampler_view(struct pipe_context *pipe, const struct pipe_sampler_view *templ) { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; @@ -1449,88 +1561,30 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - struct pipe_vertex_buffer *vbo; - unsigned i, max_index = (1 << 24) - 1; - boolean any_user_buffer = FALSE; + unsigned i; struct pipe_vertex_buffer dummy_vb = {0}; /* There must be at least one vertex buffer set, otherwise it locks up. */ if (!count) { dummy_vb.buffer = r300->dummy_vb; - dummy_vb.max_index = r300->dummy_vb->width0 / 4; buffers = &dummy_vb; count = 1; } - if (count == r300->vertex_buffer_count && - memcmp(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count) == 0) { - return; - } + u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers); if (r300->screen->caps.has_tcl) { /* HW TCL. */ - r300->incompatible_vb_layout = FALSE; - - /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { - if (buffers[i].buffer) { - if (buffers[i].stride % 4 != 0 || - buffers[i].buffer_offset % 4 != 0) { - r300->incompatible_vb_layout = TRUE; - break; - } + if (buffers[i].buffer && + !r300_resource(buffers[i].buffer)->b.user_ptr) { } } - - for (i = 0; i < count; i++) { - /* Why, yes, I AM casting away constness. How did you know? */ - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - /* Skip NULL buffers */ - if (!buffers[i].buffer) { - continue; - } - - if (r300_buffer_is_user_buffer(vbo->buffer)) { - any_user_buffer = TRUE; - } - - if (vbo->max_index == ~0) { - /* if no VBO stride then only one vertex value so max index is 1 */ - /* should think about converting to VS constants like svga does */ - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - } - - max_index = MIN2(vbo->max_index, max_index); - } - - r300->any_user_vbs = any_user_buffer; - r300->vertex_buffer_max_index = max_index; - r300->aos_dirty = TRUE; - r300->validate_buffers = TRUE; + r300->vertex_arrays_dirty = TRUE; } else { /* SW TCL. */ draw_set_vertex_buffers(r300->draw, count, buffers); } - - /* Common code. */ - for (i = 0; i < count; i++) { - /* Reference our buffer. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); - } - for (; i < r300->vertex_buffer_count; i++) { - /* Dereference any old buffers. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - } - - memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); - r300->vertex_buffer_count = count; } static void r300_set_index_buffer(struct pipe_context* pipe, @@ -1538,19 +1592,19 @@ static void r300_set_index_buffer(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (ib) { + if (ib && ib->buffer) { + assert(ib->offset % ib->index_size == 0); + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + r300->index_buffer.offset /= r300->index_buffer.index_size; } else { pipe_resource_reference(&r300->index_buffer.buffer, NULL); memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); } - if (r300->screen->caps.has_tcl) { - r300->validate_buffers = TRUE; - } - else { + if (!r300->screen->caps.has_tcl) { draw_set_index_buffer(r300->draw, ib); } } @@ -1563,17 +1617,11 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) enum pipe_format format; unsigned i; - if (velems->count > 16) { - fprintf(stderr, "r300: More than 16 vertex elements are not supported," - " requested %i, using 16.\n", velems->count); - velems->count = 16; - } - /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ for (i = 0; i < velems->count; i++) { - format = velems->hw_format[i]; + format = velems->velem[i].src_format; type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -1605,16 +1653,13 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) vstream->count = (i >> 1) + 1; } -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - static void* r300_create_vertex_elements_state(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* attribs) { + struct r300_context *r300 = r300_context(pipe); struct r300_vertex_element_state *velems; unsigned i; - enum pipe_format *format; struct pipe_vertex_element dummy_attrib = {0}; /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */ @@ -1622,81 +1667,33 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM; attribs = &dummy_attrib; count = 1; + } else if (count > 16) { + fprintf(stderr, "r300: More than 16 vertex elements are not supported," + " requested %i, using 16.\n", count); + count = 16; } - assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); - if (velems != NULL) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); - - if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Set the best hw format in case the original format is not - * supported by hw. */ - for (i = 0; i < count; i++) { - velems->hw_format[i] = velems->velem[i].src_format; - format = &velems->hw_format[i]; - - /* This is basically the list of unsupported formats. - * For now we don't care about the alignment, that's going to - * be sorted out after the PSC setup. */ - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_UNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_USCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_FIXED, R32_FLOAT); - FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); - - default:; - } + if (!velems) + return NULL; - velems->incompatible_layout = - velems->incompatible_layout || - velems->velem[i].src_format != velems->hw_format[i] || - velems->velem[i].src_offset % 4 != 0; - } + velems->count = count; + velems->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs, + velems->velem); - /* Now setup PSC. - * The unused components will be replaced by (..., 0, 1). */ - r300_vertex_psc(velems); - - /* Align the formats to the size of DWORD. - * We only care about the blocksizes of the formats since - * swizzles are already set up. - * Also compute the vertex size. */ - for (i = 0; i < count; i++) { - /* This is OK because we check for aligned strides too - * elsewhere. */ - velems->hw_format_size[i] = - align(util_format_get_blocksize(velems->hw_format[i]), 4); - velems->vertex_size_dwords += velems->hw_format_size[i] / 4; - } + if (r300_screen(pipe->screen)->caps.has_tcl) { + /* Setup PSC. + * The unused components will be replaced by (..., 0, 1). */ + r300_vertex_psc(velems); + + for (i = 0; i < count; i++) { + velems->format_size[i] = + align(util_format_get_blocksize(velems->velem[i].src_format), 4); + velems->vertex_size_dwords += velems->format_size[i] / 4; } } + return velems; } @@ -1712,6 +1709,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; + u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements); + if (r300->draw) { draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; @@ -1719,12 +1718,16 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; - r300->aos_dirty = TRUE; + r300->vertex_arrays_dirty = TRUE; } static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) { - FREE(state); + struct r300_context *r300 = r300_context(pipe); + struct r300_vertex_element_state *velems = state; + + u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements); + FREE(state); } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -1811,6 +1814,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; + struct r300_resource *rbuf = r300_resource(buf); uint32_t *mapped; switch (shader) { @@ -1824,14 +1828,18 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, return; } - if (buf == NULL || buf->width0 == 0 || - (mapped = (uint32_t*)r300_buffer(buf)->constant_buffer) == NULL) { + if (buf == NULL || buf->width0 == 0) + return; + + if (rbuf->b.user_ptr) + mapped = (uint32_t*)rbuf->b.user_ptr; + else if (rbuf->constant_buffer) + mapped = (uint32_t*)rbuf->constant_buffer; + else return; - } if (shader == PIPE_SHADER_FRAGMENT || (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) { - assert((buf->width0 % (4 * sizeof(float))) == 0); cbuf->ptr = mapped; } @@ -1862,6 +1870,14 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, } } +static void r300_texture_barrier(struct pipe_context *pipe) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_mark_atom_dirty(r300, &r300->gpu_flush); + r300_mark_atom_dirty(r300, &r300->texture_cache_inval); +} + void r300_init_state_functions(struct r300_context* r300) { r300->context.create_blend_state = r300_create_blend_state; @@ -1908,6 +1924,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_vertex_buffers = r300_set_vertex_buffers; r300->context.set_index_buffer = r300_set_index_buffer; + r300->context.redefine_user_buffer = u_default_redefine_user_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; @@ -1916,4 +1933,6 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.create_vs_state = r300_create_vs_state; r300->context.bind_vs_state = r300_bind_vs_state; r300->context.delete_vs_state = r300_delete_vs_state; + + r300->context.texture_barrier = r300_texture_barrier; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index d5fc8ece25..ec00e2552c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,10 +29,8 @@ #include "r300_context.h" #include "r300_fs.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" -#include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" @@ -490,7 +488,8 @@ static void r300_update_rs_block(struct r300_context *r300) for (; i < ATTR_GENERIC_COUNT; i++) { if (fs_inputs->generic[i] != ATTR_UNUSED) { fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " - "not enough hardware slots.\n", i); + "not enough hardware slots (it's not a bug, do not " + "report it).\n", i); } } @@ -525,7 +524,8 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } else { fprintf(stderr, "r300: ERROR: FS input fog unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, " + "do not report it)\n"); } } } @@ -552,7 +552,8 @@ static void r300_update_rs_block(struct r300_context *r300) } else { if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) { fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, do not " + "report it)\n"); } } @@ -640,11 +641,36 @@ static uint32_t r300_get_border_color(enum pipe_format format, /* Compressed formats. */ if (util_format_is_compressed(format)) { - util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - return uc.ui; + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + /* Add 1/32 to round the border color instead of truncating. */ + /* The Y component is used for the border color. */ + border_swizzled[1] = border_swizzled[2] + 1.0f/32; + util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_SNORM: + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC2_UNORM: + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + return uc.ui; + default: + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + return uc.ui; + } } switch (desc->channel[0].size) { + case 2: + util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc); + break; + case 4: util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); break; @@ -671,7 +697,20 @@ static uint32_t r300_get_border_color(enum pipe_format format, case 16: if (desc->nr_channels <= 2) { border_swizzled[0] = border_swizzled[2]; - util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc); + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + } + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + } + break; + + case 32: + if (desc->nr_channels == 1) { + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc); } else { util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); } @@ -681,6 +720,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, return uc.ui; } +static boolean util_format_is_float(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + if (!format) + return FALSE; + + /* Find the first non-void channel. */ + for (i = 0; i < 4; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + + if (i == 4) + return FALSE; + + return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; +} + static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -688,7 +746,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_texture_sampler_state *texstate; struct r300_sampler_state *sampler; struct r300_sampler_view *view; - struct r300_texture *tex; + struct r300_resource *tex; unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); @@ -706,7 +764,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) state->tx_enable |= 1 << i; view = state->sampler_views[i]; - tex = r300_texture(view->base.texture); + tex = r300_resource(view->base.texture); sampler = state->sampler_states[i]; texstate = &state->regs[i]; @@ -722,32 +780,37 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level, - tex->desc.b.b.last_level, view->base.u.tex.last_level); + tex->b.b.b.last_level, view->base.u.tex.last_level); min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level, max_level); - if (tex->desc.is_npot && min_level > 0) { + if (tex->tex.is_npot && min_level > 0) { /* Even though we do not implement mipmapping for NPOT * textures, we should at least honor the minimum level * which is allowed to be displayed. We do this by setting up - * an i-th mipmap level as the zero level. */ - r300_texture_setup_format_state(r300->screen, &tex->desc, + * the i-th mipmap level as the zero level. */ + unsigned offset = tex->tex_offset + + tex->tex.offset_in_bytes[min_level]; + + r300_texture_setup_format_state(r300->screen, tex, min_level, &texstate->format); - texstate->format.tile_config |= - tex->desc.offset_in_bytes[min_level] & 0xffffffe0; - assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0); + texstate->format.tile_config |= offset & 0xffffffe0; + assert((offset & 0x1f) == 0); + } else { + texstate->format.tile_config |= tex->tex_offset & 0xffffffe0; + assert((tex->tex_offset & 0x1f) == 0); } /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; /* Depth textures are kinda special. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + if (util_format_is_depth_or_stencil(tex->b.b.b.format)) { unsigned char depth_swizzle[4]; if (!r300->screen->caps.is_r500 && - util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + util_format_get_blocksizebits(tex->b.b.b.format) == 32) { /* X24x8 is sampled as Y16X16 on r3xx-r4xx. * The depth here is at the Y component. */ for (j = 0; j < 4; j++) @@ -772,17 +835,17 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } if (r300->screen->caps.dxtc_swizzle && - util_format_is_compressed(tex->desc.b.b.format)) { + util_format_is_compressed(tex->b.b.b.format)) { texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; } /* to emulate 1D textures through 2D ones correctly */ - if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { + if (tex->b.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->desc.is_npot) { + if (tex->tex.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -811,6 +874,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); } + /* Float textures only support nearest and mip-nearest filtering. */ + if (util_format_is_float(tex->b.b.b.format)) { + /* No MAG linear filtering. */ + if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) == + R300_TX_MAG_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK; + texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST; + } + /* No MIN linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) == + R300_TX_MIN_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST; + } + /* No mipmap linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) == + R300_TX_MIN_FILTER_MIP_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST; + } + /* No anisotropic filtering. */ + texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY; + } + texstate->filter0 |= i << 28; size += 16; @@ -859,44 +948,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -/* We can't use compressed zbuffers as samplers. */ -static void r300_flush_depth_textures(struct r300_context *r300) +static void r300_decompress_depth_textures(struct r300_context *r300) { struct r300_textures_state *state = (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; + struct pipe_resource *tex; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); + unsigned i; - if (r300->z_decomp_rd) + if (!r300->hyperz_locked || !r300->locked_zbuffer) { return; + } - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; - - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; - - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, level, 0); - } + tex = state->sampler_views[i]->base.texture; + + if (tex == r300->locked_zbuffer->texture) { + r300_decompress_zmask_locked(r300); + return; + } } + } } void r300_update_derived_state(struct r300_context* r300) { - r300_flush_depth_textures(r300); - if (r300->textures_state.dirty) { + r300_decompress_depth_textures(r300); r300_merge_textures_and_samplers(r300); } diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h deleted file mode 100644 index 71a4a47b00..0000000000 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_DERIVED_H -#define R300_STATE_DERIVED_H - -struct r300_context; - -void r300_update_derived_state(struct r300_context* r300); - -#endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 7e501221b1..54dae1acd9 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -25,13 +25,9 @@ #define R300_STATE_INLINES_H #include "draw/draw_vertex.h" - #include "pipe/p_format.h" - #include "util/u_format.h" - #include "r300_reg.h" - #include <stdio.h> /* Some maths. These should probably find their way to u_math, if needed. */ @@ -42,23 +38,24 @@ static INLINE int pack_float_16_6x(float f) { /* Blend state. */ -static INLINE uint32_t r300_translate_blend_function(int blend_func) +static INLINE uint32_t r300_translate_blend_function(int blend_func, + boolean clamp) { switch (blend_func) { - case PIPE_BLEND_ADD: - return R300_COMB_FCN_ADD_CLAMP; - case PIPE_BLEND_SUBTRACT: - return R300_COMB_FCN_SUB_CLAMP; - case PIPE_BLEND_REVERSE_SUBTRACT: - return R300_COMB_FCN_RSUB_CLAMP; - case PIPE_BLEND_MIN: - return R300_COMB_FCN_MIN; - case PIPE_BLEND_MAX: - return R300_COMB_FCN_MAX; - default: - fprintf(stderr, "r300: Unknown blend function %d\n", blend_func); - assert(0); - break; + case PIPE_BLEND_ADD: + return clamp ? R300_COMB_FCN_ADD_CLAMP : R300_COMB_FCN_ADD_NOCLAMP; + case PIPE_BLEND_SUBTRACT: + return clamp ? R300_COMB_FCN_SUB_CLAMP : R300_COMB_FCN_SUB_NOCLAMP; + case PIPE_BLEND_REVERSE_SUBTRACT: + return clamp ? R300_COMB_FCN_RSUB_CLAMP : R300_COMB_FCN_RSUB_NOCLAMP; + case PIPE_BLEND_MIN: + return R300_COMB_FCN_MIN; + case PIPE_BLEND_MAX: + return R300_COMB_FCN_MAX; + default: + fprintf(stderr, "r300: Unknown blend function %d\n", blend_func); + assert(0); + break; } return 0; } @@ -341,24 +338,6 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso) R500_TX_ANISO_HIGH_QUALITY; } -/* Non-CSO state. (For now.) */ - -static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -{ - switch (pipe_count) { - case 1: - return R300_GB_TILE_PIPE_COUNT_RV300; - case 2: - return R300_GB_TILE_PIPE_COUNT_R300; - case 3: - return R300_GB_TILE_PIPE_COUNT_R420_3P; - case 4: - return R300_GB_TILE_PIPE_COUNT_R420; - } - return 0; -} - - /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 70fc5d96d8..c650fb7ed3 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -171,8 +171,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - util_format_is_compressed(format) && dxtc_swizzle); + if (util_format_is_compressed(format) && + dxtc_swizzle && + format != PIPE_FORMAT_RGTC2_UNORM && + format != PIPE_FORMAT_RGTC2_SNORM && + format != PIPE_FORMAT_LATC2_UNORM && + format != PIPE_FORMAT_LATC2_SNORM) { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + TRUE); + } else { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + FALSE); + } /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -197,10 +207,25 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add sign. */ - for (i = 0; i < desc->nr_channels; i++) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= sign_bit[i]; + /* RGTC formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_SNORM: + result |= sign_bit[1]; + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_RGTC1_UNORM: + return R500_TX_FORMAT_ATI1N | result; + + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_SNORM: + result |= sign_bit[2] | sign_bit[3]; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC2_UNORM: + return R400_TX_FORMAT_ATI2N | result; + + default: + return ~0; /* Unsupported/unknown. */ } } @@ -211,17 +236,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_CxV8U8 | result; } - /* RGTC formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { - switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: - case PIPE_FORMAT_RGTC1_SNORM: - return R500_TX_FORMAT_ATI1N | result; - case PIPE_FORMAT_RGTC2_UNORM: - case PIPE_FORMAT_RGTC2_SNORM: - return R400_TX_FORMAT_ATI2N | result; - default: - return ~0; /* Unsupported/unknown. */ + /* Add sign. */ + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= sign_bit[i]; } } @@ -244,6 +262,11 @@ uint32_t r300_translate_texformat(enum pipe_format format, desc->channel[2].size == 6) { return R300_TX_FORMAT_Z6Y5X5 | result; } + if (desc->channel[0].size == 2 && + desc->channel[1].size == 3 && + desc->channel[2].size == 3) { + return R300_TX_FORMAT_Z3Y3X2 | result; + } return ~0; /* Unsupported/unknown. */ case 4: @@ -348,6 +371,8 @@ uint32_t r500_tx_format_msb_bit(enum pipe_format format) switch (format) { case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return R500_TXFORMAT_MSB; @@ -365,14 +390,18 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: return R300_COLOR_FORMAT_UV88; @@ -390,13 +419,21 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 32-bit buffers. */ case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; @@ -481,6 +518,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) } else { if (desc->channel[i].size == 16) { modifier |= R300_US_OUT_FMT_C4_16; + } else if (desc->channel[i].size == 10) { + modifier |= R300_US_OUT_FMT_C4_10; } else { /* C4_8 seems to be used for the formats whose pixel size * is <= 32 bits. */ @@ -499,9 +538,12 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ return modifier | R300_C2_SEL_A; case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; @@ -509,6 +551,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 16-bit outputs, two channels. * COLORFORMAT_UV88 stores C2 and C0. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C2_SEL_R; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: @@ -521,7 +564,9 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ case PIPE_FORMAT_B10G10R10A2_UNORM: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | @@ -529,20 +574,26 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* ARGB outputs. */ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; /* ABGR outputs. */ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_R10G10B10A2_UNORM: @@ -578,11 +629,12 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) } void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out) { - struct pipe_resource *pt = &desc->b.b; + struct pipe_resource *pt = &tex->b.b.b; + struct r300_texture_desc *desc = &tex->tex; boolean is_r500 = screen->caps.is_r500; /* Mask out all the fields we change. */ @@ -625,163 +677,147 @@ void r300_texture_setup_format_state(struct r300_screen *screen, R300_TXO_MICRO_TILE(desc->microtile); } -static void r300_texture_setup_fb_state(struct r300_screen* screen, - struct r300_texture* tex) +static void r300_texture_setup_fb_state(struct r300_surface *surf) { - unsigned i; + struct r300_resource *tex = r300_resource(surf->base.texture); + unsigned level = surf->base.u.tex.level; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | - R300_DEPTHMICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); + if (util_format_is_depth_or_stencil(surf->base.format)) { + surf->pitch = + tex->tex.stride_in_pixels[level] | + R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | + R300_DEPTHMICROTILE(tex->tex.microtile); + surf->format = r300_translate_zsformat(surf->base.format); + surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; + surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; } else { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - r300_translate_colorformat(tex->desc.b.b.format) | - R300_COLOR_TILE(tex->desc.macrotile[i]) | - R300_COLOR_MICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); + surf->pitch = + tex->tex.stride_in_pixels[level] | + r300_translate_colorformat(surf->base.format) | + R300_COLOR_TILE(tex->tex.macrotile[level]) | + R300_COLOR_MICROTILE(tex->tex.microtile); + surf->format = r300_translate_out_fmt(surf->base.format); } } -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format) + unsigned offset, + const struct pipe_resource *new_properties) { - struct r300_screen *r300screen = r300_screen(screen); + struct r300_screen *rscreen = r300_screen(screen); + struct r300_resource *res = r300_resource(tex); - SCREEN_DBG(r300screen, DBG_TEX, - "r300: texture_reinterpret_format: %s -> %s\n", + SCREEN_DBG(rscreen, DBG_TEX, + "r300: texture_set_properties: %s -> %s\n", util_format_short_name(tex->format), - util_format_short_name(new_format)); - - tex->format = new_format; - - r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); -} - -static unsigned r300_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - struct r300_context *r300 = r300_context(context); - struct r300_texture *rtex = (struct r300_texture *)texture; + util_format_short_name(new_properties->format)); - if (r300->rws->cs_is_buffer_referenced(r300->cs, - rtex->cs_buffer, R300_REF_CS)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + if (!r300_texture_desc_init(rscreen, res, new_properties)) { + fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n"); + return FALSE; + } + res->tex_offset = offset; + r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format); - return PIPE_UNREFERENCED; + return TRUE; } static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { - struct r300_texture* tex = (struct r300_texture*)texture; - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; - int i; - - rws->buffer_reference(rws, &tex->buffer, NULL); - for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { - if (tex->hiz_mem[i]) - u_mmFreeMem(tex->hiz_mem[i]); - if (tex->zmask_mem[i]) - u_mmFreeMem(tex->zmask_mem[i]); - } + struct r300_resource* tex = (struct r300_resource*)texture; + r300_winsys_bo_reference(&tex->buf, NULL); FREE(tex); } -static boolean r300_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle) { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_resource* tex = (struct r300_resource*)texture; if (!tex) { return FALSE; } - return rws->buffer_get_handle(rws, tex->buffer, - tex->desc.stride_in_bytes[0], whandle); + return rws->buffer_get_handle(tex->buf, + tex->tex.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +static const struct u_resource_vtbl r300_texture_vtbl = { - r300_texture_get_handle, /* get_handle */ - r300_texture_destroy, /* resource_destroy */ - r300_texture_is_referenced, /* is_resource_referenced */ - r300_texture_get_transfer, /* get_transfer */ - r300_texture_transfer_destroy, /* transfer_destroy */ - r300_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - r300_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + NULL, /* get_handle */ + r300_texture_destroy, /* resource_destroy */ + r300_texture_get_transfer, /* get_transfer */ + r300_texture_transfer_destroy, /* transfer_destroy */ + r300_texture_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + r300_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; /* The common texture constructor. */ -static struct r300_texture* +static struct r300_resource* r300_texture_create_object(struct r300_screen *rscreen, const struct pipe_resource *base, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size, - struct r300_winsys_buffer *buffer) + struct r300_winsys_bo *buffer) { struct r300_winsys_screen *rws = rscreen->rws; - struct r300_texture *tex = CALLOC_STRUCT(r300_texture); + struct r300_resource *tex = CALLOC_STRUCT(r300_resource); if (!tex) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); return NULL; } - /* Initialize the descriptor. */ - if (!r300_texture_desc_init(rscreen, &tex->desc, base, - microtile, macrotile, - stride_in_bytes_override, - max_buffer_size)) { + pipe_reference_init(&tex->b.b.b.reference, 1); + tex->b.b.b.screen = &rscreen->screen; + tex->b.b.b.usage = base->usage; + tex->b.b.b.bind = base->bind; + tex->b.b.b.flags = base->flags; + tex->b.b.vtbl = &r300_texture_vtbl; + tex->tex.microtile = microtile; + tex->tex.macrotile[0] = macrotile; + tex->tex.stride_in_bytes_override = stride_in_bytes_override; + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buf_size = max_buffer_size; + + if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); FREE(tex); return NULL; } - /* Initialize the hardware state. */ - r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format); - r300_texture_setup_fb_state(rscreen, tex); - - tex->desc.b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->desc.b.b.reference, 1); - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; - tex->buffer = buffer; /* Create the backing buffer if needed. */ - if (!tex->buffer) { - tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + if (!buffer) { + tex->buf_size = tex->tex.size_in_bytes; + tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, base->bind, base->usage, tex->domain); - if (!tex->buffer) { + if (!tex->buf) { FREE(tex); return NULL; } + } else { + tex->buf = buffer; } - tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer); + tex->cs_buf = rws->buffer_get_cs_handle(tex->buf); - rws->buffer_set_tiling(rws, tex->buffer, - tex->desc.microtile, tex->desc.macrotile[0], - tex->desc.stride_in_bytes[0]); + rws->buffer_set_tiling(tex->buf, NULL, + tex->tex.microtile, tex->tex.macrotile[0], + tex->tex.stride_in_bytes[0]); return tex; } @@ -813,7 +849,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, { struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen *rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; + struct r300_winsys_bo *buffer; enum r300_buffer_tiling microtile, macrotile; unsigned stride, size; @@ -829,7 +865,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, if (!buffer) return NULL; - rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); + rws->buffer_get_tiling(buffer, µtile, ¯otile); /* Enforce a microtiled zbuffer. */ if (util_format_is_depth_or_stencil(base->format) && @@ -840,8 +876,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) - microtile = R300_BUFFER_SQUARETILED; + microtile = R300_BUFFER_SQUARETILED; break; } } @@ -857,7 +892,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, struct pipe_resource* texture, const struct pipe_surface *surf_tmpl) { - struct r300_texture* tex = r300_texture(texture); + struct r300_resource* tex = r300_resource(texture); struct r300_surface* surface = CALLOC_STRUCT(r300_surface); unsigned level = surf_tmpl->u.tex.level; @@ -877,29 +912,28 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - surface->buffer = tex->buffer; - surface->cs_buffer = tex->cs_buffer; + surface->buf = tex->buf; + surface->cs_buf = tex->cs_buf; /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(&tex->desc, level, + surface->offset = r300_texture_get_offset(tex, level, surf_tmpl->u.tex.first_layer); - surface->pitch = tex->fb_state.pitch[level]; - surface->format = tex->fb_state.format; + r300_texture_setup_fb_state(surface); /* Parameters for the CBZB clear. */ - surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; + surface->cbzb_allowed = tex->tex.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, - tex->desc.b.b.nr_samples, - tex->desc.microtile, - tex->desc.macrotile[level], - DIM_HEIGHT); + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], + DIM_HEIGHT, 0); surface->cbzb_height = align((surface->base.height + 1) / 2, tile_height); @@ -907,7 +941,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ offset = surface->offset + - tex->desc.stride_in_bytes[level] * surface->cbzb_height; + tex->tex.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -922,8 +956,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->cbzb_allowed ? "YES" : " NO", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->desc.microtile ? "YES" : " NO", - tex->desc.macrotile[level] ? "YES" : " NO"); + tex->tex.microtile ? "YES" : " NO", + tex->tex.macrotile[level] ? "YES" : " NO"); } return &surface->base; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 0ab22f747e..158a387478 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -32,7 +32,7 @@ struct pipe_resource; struct winsys_handle; struct r300_texture_format_state; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, @@ -46,9 +46,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, uint32_t r500_tx_format_msb_bit(enum pipe_format format); -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format); + unsigned offset, + const struct pipe_resource *new_properties); boolean r300_is_colorbuffer_format_supported(enum pipe_format format); @@ -57,10 +58,14 @@ boolean r300_is_zs_format_supported(enum pipe_format format); boolean r300_is_sampler_format_supported(enum pipe_format format); void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out); +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle); + struct pipe_resource* r300_texture_from_handle(struct pipe_screen* screen, const struct pipe_resource* base, diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index aa82c47151..2910666dd5 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -34,7 +34,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, - enum r300_dim dim) + enum r300_dim dim, boolean is_rs690) { static const unsigned table[2][5][3][2] = { @@ -57,6 +57,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ } }; + static const unsigned aa_block[2] = {4, 8}; unsigned tile = 0; unsigned pixsize = util_format_get_blocksize(format); @@ -74,6 +75,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } else { /* Standard alignment. */ tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { + int align; + int h_tile; + h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; + align = 64 / (pixsize * h_tile); + if (tile < align) + tile = align; + } } assert(tile); @@ -81,19 +90,19 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } /* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, +static boolean r300_texture_macro_switch(struct r300_resource *tex, unsigned level, boolean rv350_mode, enum r300_dim dim) { unsigned tile, texdim; - tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, - desc->microtile, R300_BUFFER_TILED, dim); + tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, + tex->tex.microtile, R300_BUFFER_TILED, dim, 0); if (dim == DIM_WIDTH) { - texdim = u_minify(desc->width0, level); + texdim = u_minify(tex->tex.width0, level); } else { - texdim = u_minify(desc->height0, level); + texdim = u_minify(tex->tex.height0, level); } /* See TX_FILTER1_n.MACRO_SWITCH. */ @@ -109,91 +118,70 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, * at the given level. */ static unsigned r300_texture_get_stride(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level) { unsigned tile_width, width, stride; + boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740); - if (desc->stride_in_bytes_override) - return desc->stride_in_bytes_override; + if (tex->tex.stride_in_bytes_override) + return tex->tex.stride_in_bytes_override; /* Check the level. */ - if (level > desc->b.b.last_level) { + if (level > tex->b.b.b.last_level) { SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, desc->b.b.last_level); + __FUNCTION__, level, tex->b.b.b.last_level); return 0; } - width = u_minify(desc->width0, level); + width = u_minify(tex->tex.width0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_width = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_WIDTH); + if (util_format_is_plain(tex->b.b.b.format)) { + tile_width = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], + DIM_WIDTH, is_rs690); width = align(width, tile_width); - stride = util_format_get_stride(desc->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... */ - if (!desc->macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - unsigned min_stride; - - if (desc->microtile) { - unsigned tile_height = - r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_HEIGHT); - - min_stride = 64 / tile_height; - } else { - min_stride = 64; - } - - return stride < min_stride ? min_stride : stride; - } - + stride = util_format_get_stride(tex->b.b.b.format, width); /* The alignment to 32 bytes is sort of implied by the layout... */ return stride; } else { - return align(util_format_get_stride(desc->b.b.format, width), 32); + return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32); } } -static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, +static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, unsigned level, boolean *out_aligned_for_cbzb) { unsigned height, tile_height; - height = u_minify(desc->height0, level); + height = u_minify(tex->tex.height0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_height = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); + /* Mipmapped and 3D textures must have their height aligned to POT. */ + if ((tex->b.b.b.target != PIPE_TEXTURE_1D && + tex->b.b.b.target != PIPE_TEXTURE_2D && + tex->b.b.b.target != PIPE_TEXTURE_RECT) || + tex->b.b.b.last_level != 0) { + height = util_next_power_of_two(height); + } - /* This is needed for the kernel checker, unfortunately. */ - if ((desc->b.b.target != PIPE_TEXTURE_1D && - desc->b.b.target != PIPE_TEXTURE_2D && - desc->b.b.target != PIPE_TEXTURE_RECT) || - desc->b.b.last_level != 0) { - height = util_next_power_of_two(height); - } + if (util_format_is_plain(tex->b.b.b.format)) { + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], + DIM_HEIGHT, 0); + height = align(height, tile_height); /* See if the CBZB clear can be used on the buffer, * taking the texture size into account. */ if (out_aligned_for_cbzb) { - if (desc->macrotile[level]) { + if (tex->tex.macrotile[level]) { /* When clearing, the layer (width*height) is horizontally split * into two, and the upper and lower halves are cleared by the CB * and ZB units, respectively. Therefore, the number of macrotiles @@ -201,10 +189,10 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, /* Align the height so that there is an even number of macrotiles. * Do so for 3 or more macrotiles in the Y direction. */ - if (level == 0 && desc->b.b.last_level == 0 && - (desc->b.b.target == PIPE_TEXTURE_1D || - desc->b.b.target == PIPE_TEXTURE_2D || - desc->b.b.target == PIPE_TEXTURE_RECT) && + if (level == 0 && tex->b.b.b.last_level == 0 && + (tex->b.b.b.target == PIPE_TEXTURE_1D || + tex->b.b.b.target == PIPE_TEXTURE_2D || + tex->b.b.b.target == PIPE_TEXTURE_RECT) && height >= tile_height * 3) { height = align(height, tile_height * 2); } @@ -216,30 +204,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, } } - return util_format_get_nblocksy(desc->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture_desc *desc) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - desc->b.b.target == PIPE_TEXTURE_3D && - desc->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= desc->b.b.last_level; i++) { - size += desc->stride_in_bytes[i] * - r300_texture_get_nblocksy(desc, i, FALSE); - } - - size *= desc->depth0; - desc->size_in_bytes = size; - } + return util_format_get_nblocksy(tex->b.b.b.format, height); } /* Get a width in pixels from a stride in bytes. */ @@ -251,15 +216,15 @@ static unsigned stride_to_width(enum pipe_format format, } static void r300_setup_miptree(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, boolean align_for_cbzb) { - struct pipe_resource *base = &desc->b.b; + struct pipe_resource *base = &tex->b.b.b; unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean aligned_for_cbzb; - desc->size_in_bytes = 0; + tex->tex.size_in_bytes = 0; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", @@ -267,21 +232,21 @@ static void r300_setup_miptree(struct r300_screen *screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ - desc->macrotile[i] = - (desc->macrotile[0] == R300_BUFFER_TILED && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + tex->tex.macrotile[i] = + (tex->tex.macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? R300_BUFFER_TILED : R300_BUFFER_LINEAR; - stride = r300_texture_get_stride(screen, desc, i); + stride = r300_texture_get_stride(screen, tex, i); /* Compute the number of blocks in Y, see if the CBZB clear can be * used on the texture. */ aligned_for_cbzb = FALSE; - if (align_for_cbzb && desc->cbzb_allowed[i]) - nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + if (align_for_cbzb && tex->tex.cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb); else - nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + nblocksy = r300_texture_get_nblocksy(tex, i, NULL); layer_size = stride * nblocksy; @@ -292,75 +257,182 @@ static void r300_setup_miptree(struct r300_screen *screen, if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * u_minify(desc->depth0, i); + size = layer_size * u_minify(tex->tex.depth0, i); - desc->offset_in_bytes[i] = desc->size_in_bytes; - desc->size_in_bytes = desc->offset_in_bytes[i] + size; - desc->layer_size_in_bytes[i] = layer_size; - desc->stride_in_bytes[i] = stride; - desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); - desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes; + tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size; + tex->tex.layer_size_in_bytes[i] = layer_size; + tex->tex.stride_in_bytes[i] = stride; + tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride); + tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(desc->width0, i), u_minify(desc->height0, i), - u_minify(desc->depth0, i), stride, desc->size_in_bytes, - desc->macrotile[i] ? "TRUE" : "FALSE"); + i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i), + u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes, + tex->tex.macrotile[i] ? "TRUE" : "FALSE"); } } -static void r300_setup_flags(struct r300_texture_desc *desc) +static void r300_setup_flags(struct r300_resource *tex) { - desc->uses_stride_addressing = - !util_is_power_of_two(desc->b.b.width0) || - (desc->stride_in_bytes_override && - stride_to_width(desc->b.b.format, - desc->stride_in_bytes_override) != desc->b.b.width0); - - desc->is_npot = - desc->uses_stride_addressing || - !util_is_power_of_two(desc->b.b.height0) || - !util_is_power_of_two(desc->b.b.depth0); + tex->tex.uses_stride_addressing = + !util_is_power_of_two(tex->b.b.b.width0) || + (tex->tex.stride_in_bytes_override && + stride_to_width(tex->b.b.b.format, + tex->tex.stride_in_bytes_override) != tex->b.b.b.width0); + + tex->tex.is_npot = + tex->tex.uses_stride_addressing || + !util_is_power_of_two(tex->b.b.b.height0) || + !util_is_power_of_two(tex->b.b.b.depth0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { unsigned i, bpp; boolean first_level_valid; - bpp = util_format_get_blocksizebits(desc->b.b.format); + bpp = util_format_get_blocksizebits(tex->b.b.b.format); /* 1) The texture must be point-sampled, * 2) The depth must be 16 or 32 bits. * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage * with certain texture sizes. Macrotiling ensures the alignment. */ - first_level_valid = desc->b.b.nr_samples <= 1 && + first_level_valid = tex->b.b.b.nr_samples <= 1 && (bpp == 16 || bpp == 32) && - desc->macrotile[0]; + tex->tex.macrotile[0]; if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB)) first_level_valid = FALSE; - for (i = 0; i <= desc->b.b.last_level; i++) - desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; + for (i = 0; i <= tex->b.b.b.last_level; i++) + tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; +} + +static unsigned r300_pixels_to_dwords(unsigned stride, + unsigned height, + unsigned xblock, unsigned yblock) +{ + return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock); +} + +static void r300_setup_hyperz_properties(struct r300_screen *screen, + struct r300_resource *tex) +{ + /* The tile size of 1 DWORD in ZMASK RAM is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + * 1P/1Z 16x16 32x32 + */ + static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), + * but the blocks have very weird ordering. + * + * With 2 pipes and an image of size 8xY, where Y >= 1, + * clearing 4 dwords clears blocks like this: + * + * 01012323 + * + * where numbers correspond to dword indices. The blocks are interleaved + * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). + * + * With 4 pipes and an image of size 8xY, where Y >= 4, + * clearing 8 dwords clears blocks like this: + * 01012323 + * 45456767 + * 01012323 + * 45456767 + * where numbers correspond to dword indices. The blocks are interleaved + * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) + */ + static unsigned hiz_align_x[4] = {8, 32, 48, 32}; + static unsigned hiz_align_y[4] = {8, 8, 8, 32}; + + if (util_format_is_depth_or_stencil(tex->b.b.b.format) && + util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + tex->tex.microtile) { + unsigned i, pipes; + + if (screen->caps.family == CHIP_FAMILY_RV530) { + pipes = screen->caps.num_z_pipes; + } else { + pipes = screen->caps.num_frag_pipes; + } + + for (i = 0; i <= tex->b.b.b.last_level; i++) { + unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; + + stride = align(tex->tex.stride_in_pixels[i], 16); + height = u_minify(tex->b.b.b.height0, i); + + /* The 8x8 compression mode needs macrotiling. */ + zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + tex->tex.macrotile[i] && + tex->b.b.b.nr_samples <= 1 ? 8 : 4; + + /* Get the ZMASK buffer size in dwords. */ + zcomp_numdw = r300_pixels_to_dwords(stride, height, + zmask_blocks_x_per_dw[pipes-1] * zcompsize, + zmask_blocks_y_per_dw[pipes-1] * zcompsize); + + /* Check whether we have enough ZMASK memory. */ + if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + zcomp_numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = zcomp_numdw; + tex->tex.zcomp8x8[i] = zcompsize == 8; + + tex->tex.zmask_stride_in_pixels[i] = + util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); + } else { + tex->tex.zmask_dwords[i] = 0; + tex->tex.zcomp8x8[i] = FALSE; + tex->tex.zmask_stride_in_pixels[i] = 0; + } + + /* Now setup HIZ. */ + stride = util_align_npot(stride, hiz_align_x[pipes-1]); + height = align(height, hiz_align_y[pipes-1]); + + /* Get the HIZ buffer size in dwords. */ + hiz_numdw = (stride * height) / (8*8 * pipes); + + /* Check whether we have enough HIZ memory. */ + if (hiz_numdw <= screen->caps.hiz_ram * pipes) { + tex->tex.hiz_dwords[i] = hiz_numdw; + tex->tex.hiz_stride_in_pixels[i] = stride; + } else { + tex->tex.hiz_dwords[i] = 0; + tex->tex.hiz_stride_in_pixels[i] = 0; + } + } + } } static void r300_setup_tiling(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { - struct r300_winsys_screen *rws = screen->rws; - enum pipe_format format = desc->b.b.format; + enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + tex->tex.microtile = R300_BUFFER_LINEAR; + tex->tex.macrotile[0] = R300_BUFFER_LINEAR; + if (!util_format_is_plain(format)) { return; } /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) { return; } @@ -369,13 +441,11 @@ static void r300_setup_tiling(struct r300_screen *screen, case 1: case 4: case 8: - desc->microtile = R300_BUFFER_TILED; + tex->tex.microtile = R300_BUFFER_TILED; break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - desc->microtile = R300_BUFFER_SQUARETILED; - } + tex->tex.microtile = R300_BUFFER_SQUARETILED; break; } @@ -384,104 +454,99 @@ static void r300_setup_tiling(struct r300_screen *screen, } /* Set macrotiling. */ - if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { - desc->macrotile[0] = R300_BUFFER_TILED; + if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { + tex->tex.macrotile[0] = R300_BUFFER_TILED; } } -static void r300_tex_print_info(struct r300_screen *rscreen, - struct r300_texture_desc *desc, +static void r300_tex_print_info(struct r300_resource *tex, const char *func) { fprintf(stderr, "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " "LastLevel: %i, Size: %i, Format: %s\n", func, - desc->macrotile[0] ? "YES" : " NO", - desc->microtile ? "YES" : " NO", - desc->stride_in_pixels[0], - desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, - desc->b.b.last_level, desc->size_in_bytes, - util_format_short_name(desc->b.b.format)); + tex->tex.macrotile[0] ? "YES" : " NO", + tex->tex.microtile ? "YES" : " NO", + tex->tex.stride_in_pixels[0], + tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0, + tex->b.b.b.last_level, tex->tex.size_in_bytes, + util_format_short_name(tex->b.b.b.format)); } boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size) + struct r300_resource *tex, + const struct pipe_resource *base) { - desc->b.b = *base; - desc->b.b.screen = &rscreen->screen; - desc->stride_in_bytes_override = stride_in_bytes_override; - desc->width0 = base->width0; - desc->height0 = base->height0; - desc->depth0 = base->depth0; - - r300_setup_flags(desc); + tex->b.b.b.target = base->target; + tex->b.b.b.format = base->format; + tex->b.b.b.width0 = base->width0; + tex->b.b.b.height0 = base->height0; + tex->b.b.b.depth0 = base->depth0; + tex->b.b.b.array_size = base->array_size; + tex->b.b.b.last_level = base->last_level; + tex->b.b.b.nr_samples = base->nr_samples; + tex->tex.width0 = base->width0; + tex->tex.height0 = base->height0; + tex->tex.depth0 = base->depth0; + + r300_setup_flags(tex); /* Align a 3D NPOT texture to POT. */ - if (base->target == PIPE_TEXTURE_3D && desc->is_npot) { - desc->width0 = util_next_power_of_two(desc->width0); - desc->height0 = util_next_power_of_two(desc->height0); - desc->depth0 = util_next_power_of_two(desc->depth0); + if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) { + tex->tex.width0 = util_next_power_of_two(tex->tex.width0); + tex->tex.height0 = util_next_power_of_two(tex->tex.height0); + tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0); } /* Setup tiling. */ - if (microtile == R300_BUFFER_SELECT_LAYOUT || - macrotile == R300_BUFFER_SELECT_LAYOUT) { - r300_setup_tiling(rscreen, desc); - } else { - desc->microtile = microtile; - desc->macrotile[0] = macrotile; - assert(desc->b.b.last_level == 0); + if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, tex); } - r300_setup_cbzb_flags(rscreen, desc); + r300_setup_cbzb_flags(rscreen, tex); /* Setup the miptree description. */ - r300_setup_miptree(rscreen, desc, TRUE); + r300_setup_miptree(rscreen, tex, TRUE); /* If the required buffer size is larger the given max size, * try again without the alignment for the CBZB clear. */ - if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { - r300_setup_miptree(rscreen, desc, FALSE); + if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) { + r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_hyperz_properties(rscreen, tex); - if (max_buffer_size) { + if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ - if (desc->size_in_bytes > max_buffer_size) { + if (tex->tex.size_in_bytes > tex->buf_size) { fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", - max_buffer_size, desc->size_in_bytes); - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + tex->buf_size, tex->tex.size_in_bytes); + r300_tex_print_info(tex, "texture_desc_init"); return FALSE; } - desc->buffer_size_in_bytes = max_buffer_size; + tex->tex.buffer_size_in_bytes = tex->buf_size; } else { - desc->buffer_size_in_bytes = desc->size_in_bytes; + tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes; } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + r300_tex_print_info(tex, "texture_desc_init"); return TRUE; } -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer) { - unsigned offset = desc->offset_in_bytes[level]; + unsigned offset = tex->tex.offset_in_bytes[level]; - switch (desc->b.b.target) { + switch (tex->b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * desc->layer_size_in_bytes[level]; + return offset + layer * tex->tex.layer_size_in_bytes[level]; default: assert(layer == 0); diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index 44d88794a1..ce6e9643ec 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -30,7 +30,7 @@ struct pipe_resource; struct r300_screen; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; enum r300_dim { DIM_WIDTH = 0, @@ -41,17 +41,13 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, - enum r300_dim dim); + enum r300_dim dim, boolean is_rs690); boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size); - -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + struct r300_resource *tex, + const struct pipe_resource *base); + +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer); #endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 15a323989b..97ec0a1a1f 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -191,7 +191,12 @@ static void transform_dstreg( dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->WriteMask = src->Register.WriteMask; - dst->RelAddr = src->Register.Indirect; + + if (src->Register.Indirect) { + ttr->error = TRUE; + fprintf(stderr, "r300: Relative addressing of destination operands " + "is unsupported.\n"); + } } static void transform_srcreg( @@ -332,6 +337,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, unsigned imm_index = 0; int i; + ttr->error = FALSE; + /* Allocate constants placeholders. * * Note: What if declared constants are not contiguous? */ diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 97641a954b..adb044cfe5 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -47,6 +47,9 @@ struct tgsi_to_rc { /* Vertex shaders have no half swizzles, and no way to handle them, so * until rc grows proper support, indicate if they're safe to use. */ boolean use_half_swizzles; + + /* If an error occured. */ + boolean error; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 3b95af79bc..65c5095be6 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -37,7 +37,7 @@ struct r300_transfer { unsigned offset; /* Linear texture. */ - struct r300_texture *linear_texture; + struct r300_resource *linear_texture; }; /* Convenience cast wrapper. */ @@ -54,7 +54,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; struct pipe_resource *tex = transfer->resource; - ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b.b, 0, 0, 0, 0, tex, transfer->level, &transfer->box); } @@ -70,9 +70,10 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->linear_texture->desc.b.b, 0, &src_box); + &r300transfer->linear_texture->b.b.b, 0, &src_box); - ctx->flush(ctx, 0, NULL); + /* XXX remove this. */ + r300_flush(ctx, 0, NULL); } struct pipe_transfer* @@ -83,25 +84,25 @@ r300_texture_get_transfer(struct pipe_context *ctx, const struct pipe_box *box) { struct r300_context *r300 = r300_context(ctx); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; + const struct util_format_description *desc = + util_format_description(texture->format); referenced_cs = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS); + r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf); if (referenced_cs) { referenced_hw = TRUE; } else { referenced_hw = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_HW); + r300->rws->buffer_is_busy(tex->buf); } - blittable = ctx->screen->is_format_supported( - ctx->screen, texture->format, texture->target, 0, - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0); + blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || + desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC; trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -114,13 +115,17 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->desc.microtile || tex->desc.macrotile[level] || - ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { + if (tex->tex.microtile || tex->tex.macrotile[level] || + (referenced_hw && blittable && !(usage & PIPE_TRANSFER_READ))) { + if (r300->blitter->running) { + fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n"); + os_break(); + } + base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; base.height0 = box->height; - /* XXX: was depth0 = 0 */ base.depth0 = 1; base.array_size = 1; base.last_level = 0; @@ -141,23 +146,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ - ctx->flush(ctx, 0, NULL); + r300_flush(ctx, 0, NULL); - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->desc.microtile && !tex->desc.macrotile[level]) { + if (!tex->tex.microtile && !tex->tex.macrotile[level]) { goto unpipelined; } @@ -169,18 +174,12 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->linear_texture->desc.microtile && - !trans->linear_texture->desc.macrotile[0]); + assert(!trans->linear_texture->tex.microtile && + !trans->linear_texture->tex.macrotile[0]); - /* Set the stride. - * - * Even though we are using an internal texture for this, - * the transfer level, box and usage parameters still reflect - * the arguments received to get_transfer. We just do the - * right thing internally. - */ + /* Set the stride. */ trans->transfer.stride = - trans->linear_texture->desc.stride_in_bytes[0]; + trans->linear_texture->tex.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -188,18 +187,19 @@ r300_texture_get_transfer(struct pipe_context *ctx, r300_copy_from_tiled_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, 0, NULL); + r300_flush(ctx, 0, NULL); } return &trans->transfer; } unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = tex->desc.stride_in_bytes[level]; - trans->offset = r300_texture_get_offset(&tex->desc, level, box->z); + trans->transfer.stride = tex->tex.stride_in_bytes[level]; + trans->offset = r300_texture_get_offset(tex, level, box->z); - if (referenced_cs) - ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + if (referenced_cs && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + r300_flush(ctx, 0, NULL); return &trans->transfer; } return NULL; @@ -228,20 +228,19 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_context *r300 = r300_context(ctx); struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); char *map; - enum pipe_format format = tex->desc.b.b.format; + enum pipe_format format = tex->b.b.b.format; if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ - return rws->buffer_map(rws, - r300transfer->linear_texture->buffer, + return rws->buffer_map(r300transfer->linear_texture->buf, r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buffer, r300->cs, + map = rws->buffer_map(tex->buf, r300->cs, transfer->usage); if (!map) { @@ -259,11 +258,11 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); if (r300transfer->linear_texture) { - rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); + rws->buffer_unmap(r300transfer->linear_texture->buf); } else { - rws->buffer_unmap(rws, tex->buffer); + rws->buffer_unmap(tex->buf); } } diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 78021e2c5d..b319890157 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -226,6 +226,13 @@ void r300_translate_vertex_shader(struct r300_context *r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); + if (ttr.error) { + fprintf(stderr, "r300 VP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_vertex_shader(r300, vs); + return; + } + if (compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 0dd330d101..3a6798a542 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -28,37 +28,44 @@ * Any winsys hosting this pipe needs to implement r300_winsys_screen and then * call r300_screen_create to start things. */ +#include "r300_defines.h" + +#include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "r300_defines.h" - #define R300_MAX_CMDBUF_DWORDS (16 * 1024) +#define R300_FLUSH_ASYNC (1 << 0) struct winsys_handle; struct r300_winsys_screen; -struct r300_winsys_buffer; /* for map/unmap etc. */ -struct r300_winsys_cs_buffer; /* for write_reloc etc. */ +#define r300_winsys_bo pb_buffer +#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src) + +struct r300_winsys_cs_handle; /* for write_reloc etc. */ struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t *buf; /* The command buffer. */ }; enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, R300_VID_Z_PIPES, - R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_DRM_2_3_0, - R300_VID_DRM_2_6_0, - R300_CAN_HYPERZ, -}; - -enum r300_reference_domain { /* bitfield */ - R300_REF_CS = 1, - R300_REF_HW = 2 + R300_VID_GART_SIZE, + R300_VID_VRAM_SIZE, + R300_VID_DRM_MAJOR, + R300_VID_DRM_MINOR, + R300_VID_DRM_PATCHLEVEL, + + /* These should probably go away: */ + R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */ + + R300_CAN_HYPERZ, /* ZMask + HiZ */ + R300_CAN_AACOMPRESS, /* CMask */ }; struct r300_winsys_screen { @@ -97,91 +104,78 @@ struct r300_winsys_screen { * \param domain A bitmask of the R300_DOMAIN_* flags. * \return The created buffer object. */ - struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws, unsigned size, unsigned alignment, unsigned bind, unsigned usage, enum r300_buffer_domain domain); - struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)( - struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); - - /** - * Reference a buffer object (assign with reference counting). - * - * \param ws The winsys this function is called from. - * \param pdst A destination pointer to set the source buffer to. - * \param src A source buffer object. - */ - void (*buffer_reference)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src); + struct r300_winsys_cs_handle *(*buffer_get_cs_handle)( + struct r300_winsys_bo *buf); /** * Map the entire data store of a buffer object into the client's address * space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to map. * \param cs A command stream to flush if the buffer is referenced by it. * \param usage A bitmask of the PIPE_TRANSFER_* flags. * \return The pointer at the beginning of the buffer. */ - void *(*buffer_map)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void *(*buffer_map)(struct r300_winsys_bo *buf, struct r300_winsys_cs *cs, enum pipe_transfer_usage usage); /** * Unmap a buffer object from the client's address space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to unmap. */ - void (*buffer_unmap)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_unmap)(struct r300_winsys_bo *buf); + + /** + * Return TRUE if a buffer object is being used by the GPU. + * + * \param buf A winsys buffer object. + */ + boolean (*buffer_is_busy)(struct r300_winsys_bo *buf); /** * Wait for a buffer object until it is not used by a GPU. This is * equivalent to a fence placed after the last command using the buffer, * and synchronizing to the fence. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to wait for. */ - void (*buffer_wait)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_wait)(struct r300_winsys_bo *buf); /** * Return tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the flags from. * \param macrotile A pointer to the return value of the microtile flag. * \param microtile A pointer to the return value of the macrotile flag. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_get_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_get_tiling)(struct r300_winsys_bo *buf, enum r300_buffer_tiling *microtile, enum r300_buffer_tiling *macrotile); /** * Set tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to set the flags for. + * \param cs A command stream to flush if the buffer is referenced by it. * \param macrotile A macrotile flag. * \param microtile A microtile flag. * \param stride A stride of the buffer in bytes, for texturing. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_set_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_set_tiling)(struct r300_winsys_bo *buf, + struct r300_winsys_cs *cs, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride); @@ -196,7 +190,7 @@ struct r300_winsys_screen { * \param stride The returned buffer stride in bytes. * \param size The returned buffer size. */ - struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, unsigned *stride, unsigned *size); @@ -205,14 +199,12 @@ struct r300_winsys_screen { * Get a winsys handle from a winsys buffer. The internal structure * of the handle is platform-specific and only a winsys should access it. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the handle from. * \param whandle A winsys handle pointer. * \param stride A stride of the buffer in bytes, for texturing. * \return TRUE on success. */ - boolean (*buffer_get_handle)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + boolean (*buffer_get_handle)(struct r300_winsys_bo *buf, unsigned stride, struct winsys_handle *whandle); @@ -238,23 +230,22 @@ struct r300_winsys_screen { void (*cs_destroy)(struct r300_winsys_cs *cs); /** - * Add a buffer object to the list of buffers to validate. + * Add a new buffer relocation. Every relocation must first be added + * before it can be written. * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. - * \param rd A read domain containing a bitmask - * of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask - * of the R300_DOMAIN_* flags. + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ - void (*cs_add_buffer)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + void (*cs_add_reloc)(struct r300_winsys_cs *cs, + struct r300_winsys_cs_handle *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); /** - * Revalidate all currently set up winsys buffers. - * Returns TRUE if a flush is required. + * Return TRUE if there is enough memory in VRAM and GTT for the relocs + * added so far. * * \param cs A command stream to validate. */ @@ -269,16 +260,15 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + struct r300_winsys_cs_handle *buf); /** * Flush a command stream. * * \param cs A command stream to flush. + * \param flags, R300_FLUSH_ASYNC or 0. */ - void (*cs_flush)(struct r300_winsys_cs *cs); + void (*cs_flush)(struct r300_winsys_cs *cs, unsigned flags); /** * Set a flush callback which is called from winsys when flush is @@ -289,28 +279,17 @@ struct r300_winsys_screen { * \param user A user pointer that will be passed to the flush callback. */ void (*cs_set_flush)(struct r300_winsys_cs *cs, - void (*flush)(void *), + void (*flush)(void *ctx, unsigned flags), void *user); /** - * Reset the list of buffer objects to validate, usually called - * prior to adding buffer objects for validation. - * - * \param cs A command stream to reset buffers for. - */ - void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - - /** - * Return TRUE if a buffer is referenced by a command stream or by hardware - * (i.e. is busy), based on the domain parameter. + * Return TRUE if a buffer is referenced by a command stream. * * \param cs A command stream. * \param buf A winsys buffer. - * \param domain A bitmask of the R300_REF_* enums. */ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_reference_domain domain); + struct r300_winsys_cs_handle *buf); }; #endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk new file mode 100644 index 0000000000..b76a78810f --- /dev/null +++ b/src/gallium/drivers/r600/Android.mk @@ -0,0 +1,43 @@ +ifeq ($(strip $(MESA_BUILD_R600G)),true) + +LOCAL_PATH := $(call my-dir) + +# from Makefile +C_SOURCES = \ + r600_asm.c \ + r600_blit.c \ + r600_buffer.c \ + r600_helper.c \ + r600_pipe.c \ + r600_query.c \ + r600_resource.c \ + r600_shader.c \ + r600_state.c \ + r600_texture.c \ + r700_asm.c \ + evergreen_state.c \ + eg_asm.c \ + r600_translate.c \ + r600_state_common.c + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_CFLAGS := \ + -std=c99 \ + -fvisibility=hidden \ + -Wno-sign-compare + +LOCAL_C_INCLUDES := \ + external/mesa/src/gallium/include \ + external/mesa/src/gallium/auxiliary \ + external/drm \ + external/drm/include/drm + +LOCAL_MODULE := libmesa_pipe_r600 + +include $(BUILD_STATIC_LIBRARY) + +endif # MESA_BUILD_R600G diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 3fc1fa94c2..5a5fa6d65f 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -9,7 +9,7 @@ except OSError: Return() env.Append(CPPPATH = [ - '#/include', + '#/include', '#/src/mesa', ]) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index b79875c7c7..3793b919dd 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -35,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; @@ -60,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | @@ -90,37 +93,3 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) } return 0; } - -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index ecea1db4f1..cae3888051 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_030000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_030000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_030000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_030000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_030000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: @@ -289,10 +293,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_SWAP_ALT; + case PIPE_FORMAT_A8_UNORM: return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_SWAP_STD; @@ -313,6 +321,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_028C70_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; @@ -352,9 +361,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_SWAP_STD_REV; + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_028C70_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -362,14 +373,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_028C70_COLOR_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_028C70_COLOR_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_028C70_COLOR_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -381,9 +391,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_COLOR_4_4; + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_COLOR_8; @@ -404,6 +418,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_COLOR_8_8; @@ -430,7 +445,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_COLOR_10_10_10_2; + return V_028C70_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: @@ -471,6 +486,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_32_32; /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_COLOR_32_32_32_32; case PIPE_FORMAT_R32G32B32_FLOAT: return V_028C70_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -485,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) @@ -501,144 +519,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0; -} - -static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) -{ - uint32_t result = 0; - const struct util_format_description *desc; - unsigned i; - - desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - goto out_unknown; - } - - /* Find the first non-VOID channel. */ - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } - - switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ - case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[i].size) { - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16_FLOAT; - break; - case 2: - result = FMT_16_16_FLOAT; - break; - case 3: - result = FMT_16_16_16_FLOAT; - break; - case 4: - result = FMT_16_16_16_16_FLOAT; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32_FLOAT; - break; - case 2: - result = FMT_32_32_FLOAT; - break; - case 3: - result = FMT_32_32_32_FLOAT; - break; - case 4: - result = FMT_32_32_32_32_FLOAT; - break; - } - break; - default: - goto out_unknown; - } - break; - /* Unsigned ints */ - case UTIL_FORMAT_TYPE_UNSIGNED: - /* Signed ints */ - case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[i].size) { - case 8: - switch (desc->nr_channels) { - case 1: - result = FMT_8; - break; - case 2: - result = FMT_8_8; - break; - case 3: -// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ -// break; - case 4: - result = FMT_8_8_8_8; - break; - } - break; - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16; - break; - case 2: - result = FMT_16_16; - break; - case 3: -// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ -// break; - case 4: - result = FMT_16_16_16_16; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32; - break; - case 2: - result = FMT_32_32; - break; - case 3: - result = FMT_32_32_32; - break; - case 4: - result = FMT_32_32_32_32; - break; - } - break; - default: - goto out_unknown; - } - break; - default: - goto out_unknown; - } - - result = S_030008_DATA_FORMAT(result); - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= S_030008_FORMAT_COMP_ALL(1); - } - if (desc->channel[i].normalized) { - result |= S_030008_NUM_FORMAT_ALL(0); - } else { - result |= S_030008_NUM_FORMAT_ALL(2); - } - return result; -out_unknown: - R600_ERR("unsupported vertex format %s\n", util_format_name(format)); - return ~0; -} - #endif diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h deleted file mode 100644 index 1379c11291..0000000000 --- a/src/gallium/drivers/r600/eg_states_inc.h +++ /dev/null @@ -1,458 +0,0 @@ -/* This file is autogenerated from eg_states.h - do not edit directly */ -/* autogenerating script is gen_eg_states.py */ - -/* EG_CONFIG */ -#define EG_CONFIG__SQ_CONFIG 0 -#define EG_CONFIG__SPI_CONFIG_CNTL 1 -#define EG_CONFIG__SPI_CONFIG_CNTL_1 2 -#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1 3 -#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2 4 -#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3 5 -#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1 6 -#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2 7 -#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1 8 -#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2 9 -#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3 10 -#define EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 11 -#define EG_CONFIG__PA_CL_ENHANCE 12 -#define EG_CONFIG__SQ_DYN_GPR_RESOURCE_LIMIT_1 13 -#define EG_CONFIG__SQ_LDS_ALLOC_PS 14 -#define EG_CONFIG__SX_MISC 15 -#define EG_CONFIG__SQ_ESGS_RING_ITEMSIZE 16 -#define EG_CONFIG__SQ_GSVS_RING_ITEMSIZE 17 -#define EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE 18 -#define EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE 19 -#define EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE 20 -#define EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE 21 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE 22 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1 23 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2 24 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3 25 -#define EG_CONFIG__VGT_OUTPUT_PATH_CNTL 26 -#define EG_CONFIG__VGT_HOS_CNTL 27 -#define EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL 28 -#define EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL 29 -#define EG_CONFIG__VGT_HOS_REUSE_DEPTH 30 -#define EG_CONFIG__VGT_GROUP_PRIM_TYPE 31 -#define EG_CONFIG__VGT_GROUP_FIRST_DECR 32 -#define EG_CONFIG__VGT_GROUP_DECR 33 -#define EG_CONFIG__VGT_GROUP_VECT_0_CNTL 34 -#define EG_CONFIG__VGT_GROUP_VECT_1_CNTL 35 -#define EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 36 -#define EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 37 -#define EG_CONFIG__VGT_GS_MODE 38 -#define EG_CONFIG__PA_SC_MODE_CNTL_0 39 -#define EG_CONFIG__PA_SC_MODE_CNTL_1 40 -#define EG_CONFIG__VGT_REUSE_OFF 41 -#define EG_CONFIG__VGT_VTX_CNT_EN 42 -#define EG_CONFIG__VGT_SHADER_STAGES_EN 43 -#define EG_CONFIG__VGT_STRMOUT_CONFIG 44 -#define EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG 45 -#define EG_CONFIG_SIZE 46 -#define EG_CONFIG_PM4 128 - -/* EG_CB_CNTL */ -#define EG_CB_CNTL__CB_TARGET_MASK 0 -#define EG_CB_CNTL__CB_SHADER_MASK 1 -#define EG_CB_CNTL__CB_COLOR_CONTROL 2 -#define EG_CB_CNTL__PA_SC_AA_CONFIG 3 -#define EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 4 -#define EG_CB_CNTL__PA_SC_AA_MASK 5 -#define EG_CB_CNTL_SIZE 6 -#define EG_CB_CNTL_PM4 128 - -/* EG_RASTERIZER */ -#define EG_RASTERIZER__SPI_INTERP_CONTROL_0 0 -#define EG_RASTERIZER__PA_CL_CLIP_CNTL 1 -#define EG_RASTERIZER__PA_SU_SC_MODE_CNTL 2 -#define EG_RASTERIZER__PA_CL_VS_OUT_CNTL 3 -#define EG_RASTERIZER__PA_CL_NANINF_CNTL 4 -#define EG_RASTERIZER__PA_SU_POINT_SIZE 5 -#define EG_RASTERIZER__PA_SU_POINT_MINMAX 6 -#define EG_RASTERIZER__PA_SU_LINE_CNTL 7 -#define EG_RASTERIZER__PA_SC_MPASS_PS_CNTL 8 -#define EG_RASTERIZER__PA_SC_LINE_CNTL 9 -#define EG_RASTERIZER__PA_SU_VTX_CNTL 10 -#define EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11 -#define EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12 -#define EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13 -#define EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20 -#define EG_RASTERIZER_SIZE 21 -#define EG_RASTERIZER_PM4 128 - -/* EG_VIEWPORT */ -#define EG_VIEWPORT__PA_SC_VPORT_ZMIN_0 0 -#define EG_VIEWPORT__PA_SC_VPORT_ZMAX_0 1 -#define EG_VIEWPORT__PA_CL_VPORT_XSCALE_0 2 -#define EG_VIEWPORT__PA_CL_VPORT_YSCALE_0 3 -#define EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4 -#define EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5 -#define EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6 -#define EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7 -#define EG_VIEWPORT__PA_CL_VTE_CNTL 8 -#define EG_VIEWPORT_SIZE 9 -#define EG_VIEWPORT_PM4 128 - -/* EG_SCISSOR */ -#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0 -#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1 -#define EG_SCISSOR__PA_SC_WINDOW_OFFSET 2 -#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3 -#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4 -#define EG_SCISSOR__PA_SC_CLIPRECT_RULE 5 -#define EG_SCISSOR__PA_SC_CLIPRECT_0_TL 6 -#define EG_SCISSOR__PA_SC_CLIPRECT_0_BR 7 -#define EG_SCISSOR__PA_SC_CLIPRECT_1_TL 8 -#define EG_SCISSOR__PA_SC_CLIPRECT_1_BR 9 -#define EG_SCISSOR__PA_SC_CLIPRECT_2_TL 10 -#define EG_SCISSOR__PA_SC_CLIPRECT_2_BR 11 -#define EG_SCISSOR__PA_SC_CLIPRECT_3_TL 12 -#define EG_SCISSOR__PA_SC_CLIPRECT_3_BR 13 -#define EG_SCISSOR__PA_SC_EDGERULE 14 -#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15 -#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16 -#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17 -#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18 -#define EG_SCISSOR__PA_SU_HARDWARE_SCREEN_OFFSET 19 -#define EG_SCISSOR_SIZE 20 -#define EG_SCISSOR_PM4 128 - -/* EG_BLEND */ -#define EG_BLEND__CB_BLEND_RED 0 -#define EG_BLEND__CB_BLEND_GREEN 1 -#define EG_BLEND__CB_BLEND_BLUE 2 -#define EG_BLEND__CB_BLEND_ALPHA 3 -#define EG_BLEND__CB_BLEND0_CONTROL 4 -#define EG_BLEND__CB_BLEND1_CONTROL 5 -#define EG_BLEND__CB_BLEND2_CONTROL 6 -#define EG_BLEND__CB_BLEND3_CONTROL 7 -#define EG_BLEND__CB_BLEND4_CONTROL 8 -#define EG_BLEND__CB_BLEND5_CONTROL 9 -#define EG_BLEND__CB_BLEND6_CONTROL 10 -#define EG_BLEND__CB_BLEND7_CONTROL 11 -#define EG_BLEND_SIZE 12 -#define EG_BLEND_PM4 128 - -/* EG_DSA */ -#define EG_DSA__DB_STENCIL_CLEAR 0 -#define EG_DSA__DB_DEPTH_CLEAR 1 -#define EG_DSA__SX_ALPHA_TEST_CONTROL 2 -#define EG_DSA__DB_STENCILREFMASK 3 -#define EG_DSA__DB_STENCILREFMASK_BF 4 -#define EG_DSA__SX_ALPHA_REF 5 -#define EG_DSA__SPI_FOG_CNTL 6 -#define EG_DSA__DB_DEPTH_CONTROL 7 -#define EG_DSA__DB_SHADER_CONTROL 8 -#define EG_DSA__DB_RENDER_CONTROL 9 -#define EG_DSA__DB_COUNT_CONTROL 10 -#define EG_DSA__DB_RENDER_OVERRIDE 11 -#define EG_DSA__DB_RENDER_OVERRIDE2 12 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 13 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 14 -#define EG_DSA__DB_PRELOAD_CONTROL 15 -#define EG_DSA__DB_ALPHA_TO_MASK 16 -#define EG_DSA_SIZE 17 -#define EG_DSA_PM4 128 - -/* EG_VS_SHADER */ -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_0 0 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_1 1 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_2 2 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_3 3 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_4 4 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_5 5 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_6 6 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_7 7 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_8 8 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_9 9 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_10 10 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_11 11 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_12 12 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_13 13 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_14 14 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_15 15 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_16 16 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_17 17 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_18 18 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_19 19 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_20 20 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_21 21 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_22 22 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_23 23 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_24 24 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_25 25 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_26 26 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_27 27 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_28 28 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_29 29 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_30 30 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_31 31 -#define EG_VS_SHADER__SPI_VS_OUT_ID_0 32 -#define EG_VS_SHADER__SPI_VS_OUT_ID_1 33 -#define EG_VS_SHADER__SPI_VS_OUT_ID_2 34 -#define EG_VS_SHADER__SPI_VS_OUT_ID_3 35 -#define EG_VS_SHADER__SPI_VS_OUT_ID_4 36 -#define EG_VS_SHADER__SPI_VS_OUT_ID_5 37 -#define EG_VS_SHADER__SPI_VS_OUT_ID_6 38 -#define EG_VS_SHADER__SPI_VS_OUT_ID_7 39 -#define EG_VS_SHADER__SPI_VS_OUT_ID_8 40 -#define EG_VS_SHADER__SPI_VS_OUT_ID_9 41 -#define EG_VS_SHADER__SPI_VS_OUT_CONFIG 42 -#define EG_VS_SHADER__SQ_PGM_START_VS 43 -#define EG_VS_SHADER__SQ_PGM_RESOURCES_VS 44 -#define EG_VS_SHADER__SQ_PGM_RESOURCES_2_VS 45 -#define EG_VS_SHADER__SQ_PGM_START_FS 46 -#define EG_VS_SHADER__SQ_PGM_RESOURCES_FS 47 -#define EG_VS_SHADER_SIZE 48 -#define EG_VS_SHADER_PM4 128 - -/* EG_PS_SHADER */ -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 0 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_1 1 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_2 2 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_3 3 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_4 4 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_5 5 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_6 6 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_7 7 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_8 8 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_9 9 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_10 10 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_11 11 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_12 12 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_13 13 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_14 14 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_15 15 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_16 16 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_17 17 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_18 18 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_19 19 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_20 20 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_21 21 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_22 22 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_23 23 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_24 24 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_25 25 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_26 26 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_27 27 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_28 28 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_29 29 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_30 30 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_31 31 -#define EG_PS_SHADER__SPI_THREAD_GROUPING 32 -#define EG_PS_SHADER__SPI_PS_IN_CONTROL_0 33 -#define EG_PS_SHADER__SPI_PS_IN_CONTROL_1 34 -#define EG_PS_SHADER__SPI_INPUT_Z 35 -#define EG_PS_SHADER__SPI_BARYC_CNTL 36 -#define EG_PS_SHADER__SPI_PS_IN_CONTROL_2 37 -#define EG_PS_SHADER__SPI_COMPUTE_INPUT_CNTL 38 -#define EG_PS_SHADER__SQ_PGM_START_PS 39 -#define EG_PS_SHADER__SQ_PGM_RESOURCES_PS 40 -#define EG_PS_SHADER__SQ_PGM_RESOURCES_2_PS 41 -#define EG_PS_SHADER__SQ_PGM_EXPORTS_PS 42 -#define EG_PS_SHADER_SIZE 43 -#define EG_PS_SHADER_PM4 128 - -/* EG_UCP */ -#define EG_UCP__PA_CL_UCP0_X 0 -#define EG_UCP__PA_CL_UCP0_Y 1 -#define EG_UCP__PA_CL_UCP0_Z 2 -#define EG_UCP__PA_CL_UCP0_W 3 -#define EG_UCP__PA_CL_UCP1_X 4 -#define EG_UCP__PA_CL_UCP1_Y 5 -#define EG_UCP__PA_CL_UCP1_Z 6 -#define EG_UCP__PA_CL_UCP1_W 7 -#define EG_UCP__PA_CL_UCP2_X 8 -#define EG_UCP__PA_CL_UCP2_Y 9 -#define EG_UCP__PA_CL_UCP2_Z 10 -#define EG_UCP__PA_CL_UCP2_W 11 -#define EG_UCP__PA_CL_UCP3_X 12 -#define EG_UCP__PA_CL_UCP3_Y 13 -#define EG_UCP__PA_CL_UCP3_Z 14 -#define EG_UCP__PA_CL_UCP3_W 15 -#define EG_UCP__PA_CL_UCP4_X 16 -#define EG_UCP__PA_CL_UCP4_Y 17 -#define EG_UCP__PA_CL_UCP4_Z 18 -#define EG_UCP__PA_CL_UCP4_W 19 -#define EG_UCP__PA_CL_UCP5_X 20 -#define EG_UCP__PA_CL_UCP5_Y 21 -#define EG_UCP__PA_CL_UCP5_Z 22 -#define EG_UCP__PA_CL_UCP5_W 23 -#define EG_UCP_SIZE 24 -#define EG_UCP_PM4 128 - -/* EG_VS_CBUF */ -#define EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0 -#define EG_VS_CBUF__ALU_CONST_CACHE_VS_0 1 -#define EG_VS_CBUF_SIZE 2 -#define EG_VS_CBUF_PM4 128 - -/* EG_PS_CBUF */ -#define EG_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0 -#define EG_PS_CBUF__ALU_CONST_CACHE_PS_0 1 -#define EG_PS_CBUF_SIZE 2 -#define EG_PS_CBUF_PM4 128 - -/* EG_PS_RESOURCE */ -#define EG_PS_RESOURCE__RESOURCE0_WORD0 0 -#define EG_PS_RESOURCE__RESOURCE0_WORD1 1 -#define EG_PS_RESOURCE__RESOURCE0_WORD2 2 -#define EG_PS_RESOURCE__RESOURCE0_WORD3 3 -#define EG_PS_RESOURCE__RESOURCE0_WORD4 4 -#define EG_PS_RESOURCE__RESOURCE0_WORD5 5 -#define EG_PS_RESOURCE__RESOURCE0_WORD6 6 -#define EG_PS_RESOURCE__RESOURCE0_WORD7 7 -#define EG_PS_RESOURCE_SIZE 8 -#define EG_PS_RESOURCE_PM4 128 - -/* EG_VS_RESOURCE */ -#define EG_VS_RESOURCE__RESOURCE160_WORD0 0 -#define EG_VS_RESOURCE__RESOURCE160_WORD1 1 -#define EG_VS_RESOURCE__RESOURCE160_WORD2 2 -#define EG_VS_RESOURCE__RESOURCE160_WORD3 3 -#define EG_VS_RESOURCE__RESOURCE160_WORD4 4 -#define EG_VS_RESOURCE__RESOURCE160_WORD5 5 -#define EG_VS_RESOURCE__RESOURCE160_WORD6 6 -#define EG_VS_RESOURCE__RESOURCE160_WORD7 7 -#define EG_VS_RESOURCE_SIZE 8 -#define EG_VS_RESOURCE_PM4 128 - -/* EG_FS_RESOURCE */ -#define EG_FS_RESOURCE__RESOURCE320_WORD0 0 -#define EG_FS_RESOURCE__RESOURCE320_WORD1 1 -#define EG_FS_RESOURCE__RESOURCE320_WORD2 2 -#define EG_FS_RESOURCE__RESOURCE320_WORD3 3 -#define EG_FS_RESOURCE__RESOURCE320_WORD4 4 -#define EG_FS_RESOURCE__RESOURCE320_WORD5 5 -#define EG_FS_RESOURCE__RESOURCE320_WORD6 6 -#define EG_FS_RESOURCE__RESOURCE320_WORD7 7 -#define EG_FS_RESOURCE_SIZE 8 -#define EG_FS_RESOURCE_PM4 128 - -/* EG_GS_RESOURCE */ -#define EG_GS_RESOURCE__RESOURCE336_WORD0 0 -#define EG_GS_RESOURCE__RESOURCE336_WORD1 1 -#define EG_GS_RESOURCE__RESOURCE336_WORD2 2 -#define EG_GS_RESOURCE__RESOURCE336_WORD3 3 -#define EG_GS_RESOURCE__RESOURCE336_WORD4 4 -#define EG_GS_RESOURCE__RESOURCE336_WORD5 5 -#define EG_GS_RESOURCE__RESOURCE336_WORD6 6 -#define EG_GS_RESOURCE__RESOURCE336_WORD7 7 -#define EG_GS_RESOURCE_SIZE 8 -#define EG_GS_RESOURCE_PM4 128 - -/* EG_PS_SAMPLER */ -#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0 -#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1 -#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2 -#define EG_PS_SAMPLER_SIZE 3 -#define EG_PS_SAMPLER_PM4 128 - -/* EG_VS_SAMPLER */ -#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0 -#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1 -#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2 -#define EG_VS_SAMPLER_SIZE 3 -#define EG_VS_SAMPLER_PM4 128 - -/* EG_GS_SAMPLER */ -#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0 -#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1 -#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2 -#define EG_GS_SAMPLER_SIZE 3 -#define EG_GS_SAMPLER_PM4 128 - -/* EG_PS_SAMPLER_BORDER */ -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4 -#define EG_PS_SAMPLER_BORDER_SIZE 5 -#define EG_PS_SAMPLER_BORDER_PM4 128 - -/* EG_VS_SAMPLER_BORDER */ -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4 -#define EG_VS_SAMPLER_BORDER_SIZE 5 -#define EG_VS_SAMPLER_BORDER_PM4 128 - -/* EG_GS_SAMPLER_BORDER */ -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4 -#define EG_GS_SAMPLER_BORDER_SIZE 5 -#define EG_GS_SAMPLER_BORDER_PM4 128 - -/* EG_CB */ -#define EG_CB__CB_COLOR0_BASE 0 -#define EG_CB__CB_COLOR0_PITCH 1 -#define EG_CB__CB_COLOR0_SLICE 2 -#define EG_CB__CB_COLOR0_VIEW 3 -#define EG_CB__CB_COLOR0_INFO 4 -#define EG_CB__CB_COLOR0_ATTRIB 5 -#define EG_CB__CB_COLOR0_DIM 6 -#define EG_CB_SIZE 7 -#define EG_CB_PM4 128 - -/* EG_DB */ -#define EG_DB__DB_HTILE_DATA_BASE 0 -#define EG_DB__DB_Z_INFO 1 -#define EG_DB__DB_STENCIL_INFO 2 -#define EG_DB__DB_DEPTH_SIZE 3 -#define EG_DB__DB_DEPTH_SLICE 4 -#define EG_DB__DB_DEPTH_VIEW 5 -#define EG_DB__DB_HTILE_SURFACE 6 -#define EG_DB__DB_Z_READ_BASE 7 -#define EG_DB__DB_STENCIL_READ_BASE 8 -#define EG_DB__DB_Z_WRITE_BASE 9 -#define EG_DB__DB_STENCIL_WRITE_BASE 10 -#define EG_DB_SIZE 11 -#define EG_DB_PM4 128 - -/* EG_VGT */ -#define EG_VGT__VGT_PRIMITIVE_TYPE 0 -#define EG_VGT__VGT_MAX_VTX_INDX 1 -#define EG_VGT__VGT_MIN_VTX_INDX 2 -#define EG_VGT__VGT_INDX_OFFSET 3 -#define EG_VGT__VGT_DMA_INDEX_TYPE 4 -#define EG_VGT__VGT_PRIMITIVEID_EN 5 -#define EG_VGT__VGT_DMA_NUM_INSTANCES 6 -#define EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN 7 -#define EG_VGT__VGT_INSTANCE_STEP_RATE_0 8 -#define EG_VGT__VGT_INSTANCE_STEP_RATE_1 9 -#define EG_VGT_SIZE 10 -#define EG_VGT_PM4 128 - -/* EG_DRAW */ -#define EG_DRAW__VGT_NUM_INDICES 0 -#define EG_DRAW__VGT_DMA_BASE_HI 1 -#define EG_DRAW__VGT_DMA_BASE 2 -#define EG_DRAW__VGT_DRAW_INITIATOR 3 -#define EG_DRAW_SIZE 4 -#define EG_DRAW_PM4 128 - -/* EG_VGT_EVENT */ -#define EG_VGT_EVENT__VGT_EVENT_INITIATOR 0 -#define EG_VGT_EVENT_SIZE 1 -#define EG_VGT_EVENT_PM4 128 - -/* EG_CB_FLUSH */ -#define EG_CB_FLUSH_SIZE 0 -#define EG_CB_FLUSH_PM4 128 - -/* EG_DB_FLUSH */ -#define EG_DB_FLUSH_SIZE 0 -#define EG_DB_FLUSH_PM4 128 - diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 9e1a5e1f98..77432661b6 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { @@ -150,10 +150,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_DSA; /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); stencil_ref_mask = 0; stencil_ref_mask_bf = 0; @@ -210,7 +206,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, + * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by + * evergreen_pipe_shader_ps().*/ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); @@ -305,11 +304,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -328,6 +332,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); @@ -351,7 +356,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct r600_resource *rbuffer; unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; if (resource == NULL) @@ -370,7 +375,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { @@ -380,36 +385,43 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { + r600_texture_depth_flush(ctx, texture, TRUE); + tmp = tmp->flushed_depth_texture; + } + + if (tmp->force_int_type) { + word4 &= C_030010_NUM_FORMAT_ALL; + word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT); + } + rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch_in_pixels[0], 8); + + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | + S_030000_NON_DISP_TILING_ORDER(tile_type) | S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, S_030004_TEX_HEIGHT(texture->height0 - 1) | - S_030004_TEX_DEPTH(texture->depth0 - 1), + S_030004_TEX_DEPTH(texture->depth0 - 1) | + S_030004_ARRAY_MODE(array_mode), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | - S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | + word4 | + S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) | S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->u.tex.last_level) | @@ -431,7 +443,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -446,9 +459,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -457,7 +472,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } @@ -638,11 +654,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned color_info; unsigned format, swap, ntype; unsigned offset; + unsigned tile_type; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -651,21 +675,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | + S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_028C70_SOURCE_FORMAT(1); + + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + /* we can only set the export size if any thing is snorm/unorm component is > 11 bits, + if we aren't a float, sint or uint */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && + ntype != 4 && ntype != 5) + color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); + + if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) { + tile_type = rtex->tile_type; + } else /* workaround for linear buffers */ + tile_type = 1; /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, @@ -690,7 +736,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, - S_028C74_NON_DISP_TILING_ORDER(1), + S_028C74_NON_DISP_TILING_ORDER(tile_type), 0xFFFFFFFF, bo[0]); } @@ -711,17 +757,14 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; - rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); @@ -770,8 +813,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); - rctx->pframebuffer = &rctx->framebuffer; - /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { evergreen_cb(rctx, rstate, state, i); @@ -839,48 +880,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } -static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } -} - void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -908,7 +907,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; - rctx->context.set_constant_buffer = evergreen_set_constant_buffer; + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view; rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state; rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple; @@ -920,6 +919,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void evergreen_init_config(struct r600_pipe_context *rctx) @@ -1069,12 +1069,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx) num_hs_stack_entries = 42; num_ls_stack_entries = 42; break; + case CHIP_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } tmp = 0x00000000; switch (family) { case CHIP_CEDAR: case CHIP_PALM: + case CHIP_CAICOS: break; default: tmp |= S_008C00_VC_ENABLE(1); @@ -1260,226 +1324,11 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) } } -static void evergreen_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i] + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } - - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_fs_resource_set(&rctx->ctx, rstate, i); - } -} - -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - struct r600_drawl draw; - unsigned prim; - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } - if (r600_conv_pipe_prim(draw.mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - evergreen_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw.count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw.index_buffer) { - rbuffer = (struct r600_resource*)draw.index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw.index_buffer_offset; - } - evergreen_context_draw(&rctx->ctx, &rdraw); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; int pos_index = -1, face_index = -1; int ninterp = 0; boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; @@ -1487,6 +1336,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rstate->nregs = 0; + db_shader_control = 0; for (i = 0; i < rshader->ninput; i++) { /* evergreen NUM_INTERP only contains values interpolated into the LDS, POSITION goes via GPRs from the SC so isn't counted */ @@ -1508,16 +1358,12 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader } for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); + db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_STENCIL_EXPORT_ENABLE(1), - S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); + db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(1); } + if (rshader->uses_kill) + db_shader_control |= S_02880C_KILL_ENABLE(1); exports_ps = 0; num_cout = 0; @@ -1592,15 +1438,15 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } - + /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */ + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + db_shader_control, + S_02880C_Z_EXPORT_ENABLE(1) | + S_02880C_STENCIL_EXPORT_ENABLE(1) | + S_02880C_KILL_ENABLE(1), + NULL); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, 0xFFFFFFFF, NULL); @@ -1651,6 +1497,18 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0xFFFFFFFF, NULL); } +void evergreen_fetch_shader(struct r600_vertex_element *ve) +{ + struct r600_pipe_state *rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(ve->fetch_shader)) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; @@ -1673,3 +1531,31 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), NULL); return rstate; } + +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + S_030008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + 0xC0000000, 0xFFFFFFFF, NULL); +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index e67254b256..c51a163bd0 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -108,8 +108,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 @@ -327,6 +328,9 @@ #define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24) #define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3) #define C_028C70_SOURCE_FORMAT 0xFCFFFFFF +#define V_028C70_EXPORT_4C_32BPC 0x0 +#define V_028C70_EXPORT_4C_16BPC 0x1 +#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */ #define S_028C70_RAT(x) (((x) & 0x1) << 26) #define G_028C70_RAT(x) (((x) >> 26) & 0x1) #define C_028C70_RAT 0xFBFFFFFF @@ -427,15 +431,6 @@ #define C_028800_STENCILZFAIL_BF 0x1FFFFFFF #define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) #define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 @@ -939,6 +934,9 @@ #define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005 #define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006 #define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007 +#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5) +#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1) +#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF #define S_030000_PITCH(x) (((x) & 0xFFF) << 6) #define G_030000_PITCH(x) (((x) >> 6) & 0xFFF) #define C_030000_PITCH 0xFFFC003F @@ -988,8 +986,8 @@ #define S_030010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) #define G_030010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) #define C_030010_SRF_MODE_ALL 0xFFFFFBFF -#define V_030010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 -#define V_030010_SFR_MODE_NO_ZERO 0x00000001 +#define V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 +#define V_030010_SRF_MODE_NO_ZERO 0x00000001 #define S_030010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) #define G_030010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) #define C_030010_FORCE_DEGAMMA 0xFFFFF7FF diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index aa456d493f..0b7d6f7096 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -35,7 +35,7 @@ #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) #define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) typedef uint64_t u64; typedef uint32_t u32; @@ -92,6 +92,9 @@ enum radeon_family { CHIP_CYPRESS, CHIP_HEMLOCK, CHIP_PALM, + CHIP_BARTS, + CHIP_TURKS, + CHIP_CAICOS, CHIP_LAST, }; @@ -110,14 +113,17 @@ struct r600_tiling_info { enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); +unsigned r600_get_clock_crystal_freq(struct radeon *radeon); +unsigned r600_get_minor_version(struct radeon *radeon); +unsigned r600_get_num_backends(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, - unsigned size, unsigned alignment, - unsigned binding, unsigned usage); + unsigned size, unsigned alignment, + unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, @@ -245,10 +251,9 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; - unsigned fence; struct list_head fenced_bo; - unsigned *cfence; - struct r600_bo *fence_bo; + unsigned max_db; /* for OQ */ + boolean predicate_drawing; }; struct r600_draw { @@ -281,13 +286,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); -void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); - void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 1f41269534..240093f9b9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -32,52 +32,118 @@ #include "r600_formats.h" #include "r600d.h" -static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) +#define NUM_OF_CYCLES 3 +#define NUM_OF_COMPONENTS 4 + +static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu) { if(alu->is_op3) return 3; - switch (alu->inst) { - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: - return 0; - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: - return 2; - - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: - return 1; - default: R600_ERR( - "Need instruction operand number for 0x%x.\n", alu->inst); - }; + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + switch (alu->inst) { + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + return 2; + + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + } + break; + case CHIPREV_EVERGREEN: + switch (alu->inst) { + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW: + return 2; + + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + } + break; + } return 3; } @@ -104,7 +170,6 @@ static struct r600_bc_alu *r600_bc_alu(void) if (alu == NULL) return NULL; LIST_INITHEAD(&alu->list); - LIST_INITHEAD(&alu->bs_list); return alu; } @@ -155,6 +220,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: bc->chiprev = CHIPREV_EVERGREEN; break; default: @@ -184,6 +252,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) { int r; + if (bc->cf_last && (bc->cf_last->inst == output->inst || + (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) && + output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) && + output->type == bc->cf_last->output.type && + output->elem_size == bc->cf_last->output.elem_size && + output->swizzle_x == bc->cf_last->output.swizzle_x && + output->swizzle_y == bc->cf_last->output.swizzle_y && + output->swizzle_z == bc->cf_last->output.swizzle_z && + output->swizzle_w == bc->cf_last->output.swizzle_w && + (output->burst_count + bc->cf_last->output.burst_count) <= 16) { + + if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && + (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { + + bc->cf_last->output.end_of_program |= output->end_of_program; + bc->cf_last->output.inst = output->inst; + bc->cf_last->output.gpr = output->gpr; + bc->cf_last->output.array_base = output->array_base; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + + } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && + output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { + + bc->cf_last->output.end_of_program |= output->end_of_program; + bc->cf_last->output.inst = output->inst; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + } + } + r = r600_bc_add_cf(bc); if (r) return r; @@ -192,221 +291,849 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) return 0; } -const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 - SQ_ALU_VEC_120, //001 - SQ_ALU_VEC_102, //010 +/* alu instructions that can ony exits once per group */ +static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); + } +} + +static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); + } +} - SQ_ALU_VEC_201, //011 - SQ_ALU_VEC_012, //100 - SQ_ALU_VEC_021, //101 +static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + } +} - SQ_ALU_VEC_012, //110 - SQ_ALU_VEC_012}; //111 +static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + } +} -const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 - SQ_ALU_SCL_122, //001 - SQ_ALU_SCL_122, //010 +/* alu instructions that can only execute on the vector unit */ +static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + return is_alu_reduction_inst(bc, alu) || + is_alu_mova_inst(bc, alu) || + (bc->chiprev == CHIPREV_EVERGREEN && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); +} - SQ_ALU_SCL_221, //011 - SQ_ALU_SCL_212, //100 - SQ_ALU_SCL_122, //101 +/* alu instructions that can only execute on the trans unit */ +static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + if (!alu->is_op3) + return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE; + else + return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; + case CHIPREV_EVERGREEN: + default: + if (!alu->is_op3) + /* Note that FLT_TO_INT_* instructions are vector-only instructions + * on Evergreen, despite what the documentation says. FLT_TO_INT + * can do both vector and scalar. */ + return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE; + else + return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; + } +} - SQ_ALU_SCL_122, //110 - SQ_ALU_SCL_122}; //111 +/* alu instructions that can execute on any unit */ +static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + return !is_alu_vec_unit_inst(bc, alu) && + !is_alu_trans_unit_inst(bc, alu); +} -static int init_gpr(struct r600_bc_alu *alu) +static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, + struct r600_bc_alu *assignment[5]) { - int cycle, component; + struct r600_bc_alu *alu; + unsigned i, chan, trans; + + for (i = 0; i < 5; i++) + assignment[i] = NULL; + + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + chan = alu->dst.chan; + if (is_alu_trans_unit_inst(bc, alu)) + trans = 1; + else if (is_alu_vec_unit_inst(bc, alu)) + trans = 0; + else if (assignment[chan]) + trans = 1; // assume ALU_INST_PREFER_VECTOR + else + trans = 0; + + if (trans) { + if (assignment[4]) { + assert(0); //ALU.Trans has already been allocated + return -1; + } + assignment[4] = alu; + } else { + if (assignment[chan]) { + assert(0); //ALU.chan has already been allocated + return -1; + } + assignment[chan] = alu; + } + + if (alu->last) + break; + } + return 0; +} + +struct alu_bank_swizzle { + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_cfile_addr[4]; + int hw_cfile_elem[4]; +}; + +static const unsigned cycle_for_bank_swizzle_vec[][3] = { + [SQ_ALU_VEC_012] = { 0, 1, 2 }, + [SQ_ALU_VEC_021] = { 0, 2, 1 }, + [SQ_ALU_VEC_120] = { 1, 2, 0 }, + [SQ_ALU_VEC_102] = { 1, 0, 2 }, + [SQ_ALU_VEC_201] = { 2, 0, 1 }, + [SQ_ALU_VEC_210] = { 2, 1, 0 } +}; + +static const unsigned cycle_for_bank_swizzle_scl[][3] = { + [SQ_ALU_SCL_210] = { 2, 1, 0 }, + [SQ_ALU_SCL_122] = { 1, 2, 2 }, + [SQ_ALU_SCL_212] = { 2, 1, 2 }, + [SQ_ALU_SCL_221] = { 2, 2, 1 } +}; + +static void init_bank_swizzle(struct alu_bank_swizzle *bs) +{ + int i, cycle, component; /* set up gpr use */ for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) for (component = 0; component < NUM_OF_COMPONENTS; component++) - alu->hw_gpr[cycle][component] = -1; - return 0; + bs->hw_gpr[cycle][component] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_addr[i] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_elem[i] = -1; } -#if 0 -static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle) +static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) { - if (alu->hw_gpr[cycle][chan] < 0) - alu->hw_gpr[cycle][chan] = sel; - else if (alu->hw_gpr[cycle][chan] != (int)sel) { - R600_ERR("Another scalar operation has already used GPR read port for channel\n"); + if (bs->hw_gpr[cycle][chan] == -1) + bs->hw_gpr[cycle][chan] = sel; + else if (bs->hw_gpr[cycle][chan] != (int)sel) { + // Another scalar operation has already used GPR read port for channel return -1; } return 0; } -static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) +static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { - int table[3]; - int ret = 0; - switch (swiz) { - case SQ_ALU_SCL_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_122: - table[0] = 1; table[1] = 2; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_212: - table[0] = 2; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_221: - table[0] = 2; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - break; - default: - R600_ERR("bad scalar bank swizzle value\n"); - ret = -1; - break; + int res, num_res = 4; + if (bc->chiprev >= CHIPREV_R700) { + num_res = 2; + chan /= 2; + } + for (res = 0; res < num_res; ++res) { + if (bs->hw_cfile_addr[res] == -1) { + bs->hw_cfile_addr[res] = sel; + bs->hw_cfile_elem[res] = chan; + return 0; + } else if (bs->hw_cfile_addr[res] == sel && + bs->hw_cfile_elem[res] == chan) + return 0; // Read for this scalar element already reserved, nothing to do here. } - return ret; + // All cfile read ports are used, cannot reference vector element + return -1; } -static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) +static int is_gpr(unsigned sel) { - int table[3]; - int ret; - - switch (swiz) { - case SQ_ALU_VEC_012: - table[0] = 0; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_021: - table[0] = 0; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_120: - table[0] = 1; table[1] = 2; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_102: - table[0] = 1; table[1] = 0; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_201: - table[0] = 2; table[1] = 0; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - default: - R600_ERR("bad vector bank swizzle value\n"); - ret = -1; - break; - } - return ret; + return (sel >= 0 && sel <= 127); } +/* CB constants start at 512, and get translated to a kcache index when ALU + * clauses are constructed. Note that we handle kcache constants the same way + * as (the now gone) cfile constants, is that really required? */ +static int is_cfile(unsigned sel) +{ + return (sel > 255 && sel < 512) || + (sel > 511 && sel < 4607) || // Kcache before translate + (sel > 127 && sel < 192); // Kcache after translate +} +static int is_const(int sel) +{ + return is_cfile(sel) || + (sel >= V_SQ_ALU_SRC_0 && + sel <= V_SQ_ALU_SRC_LITERAL); +} -static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter) +static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, + struct alu_bank_swizzle *bs, int bank_swizzle) { - int num_src; - int i; - int channel_swizzle; + int r, src, num_src, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; src++) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; + if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) + // Nothing to do; special-case optimization, + // second source uses first source’s reservation + continue; + else { + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + } else if (is_cfile(sel)) { + r = reserve_cfile(bc, bs, sel, elem); + if (r) + return r; + } + // No restrictions on PV, PS, literal or special constants + } + return 0; +} - num_src = r600_bc_get_num_operands(alu); +static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, + struct alu_bank_swizzle *bs, int bank_swizzle) +{ + int r, src, num_src, const_count, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(bc, alu); + for (const_count = 0, src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_const(sel)) { // Any constant, including literal and inline constants + if (const_count >= 2) + // More than two references to a constant in + // transcendental operation. + return -1; + else + const_count++; + } + if (is_cfile(sel)) { + r = reserve_cfile(bc, bs, sel, elem); + if (r) + return r; + } + } + for (src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; + if (cycle < const_count) + // Cycle for GPR load conflicts with + // constant load in transcendental operation. + return -1; + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + // Constants already processed + // No restrictions on PV, PS + } + return 0; +} - for (i = 0; i < num_src; i++) { - channel_swizzle = alu->src[i].chan; - if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3) - chan_counter[channel_swizzle]++; +static int check_and_set_bank_swizzle(struct r600_bc *bc, + struct r600_bc_alu *slots[5]) +{ + struct alu_bank_swizzle bs; + int bank_swizzle[5]; + int i, r = 0, forced = 0; + + for (i = 0; i < 5; i++) + if (slots[i] && slots[i]->bank_swizzle_force) { + slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; + forced = 1; + } + + if (forced) + return 0; + + // just check every possible combination of bank swizzle + // not very efficent, but works on the first try in most of the cases + for (i = 0; i < 4; i++) + bank_swizzle[i] = SQ_ALU_VEC_012; + bank_swizzle[4] = SQ_ALU_SCL_210; + while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + init_bank_swizzle(&bs); + for (i = 0; i < 4; i++) { + if (slots[i]) { + r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); + if (r) + break; + } + } + if (!r && slots[4]) { + r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); + } + if (!r) { + for (i = 0; i < 5; i++) { + if (slots[i]) + slots[i]->bank_swizzle = bank_swizzle[i]; + } + return 0; + } + + for (i = 0; i < 5; i++) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } + + // couldn't find a working swizzle + return -1; } -/* we need something like this I think - but this is bogus */ -int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) +static int replace_gpr_with_pv_ps(struct r600_bc *bc, + struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) { - struct r600_bc_alu *alu; - int chan_counter[4] = { 0 }; + struct r600_bc_alu *prev[5]; + int gpr[5], chan[5]; + int i, j, r, src, num_src; - update_chan_counter(alu_first, chan_counter); + r = assign_alu_units(bc, alu_prev, prev); + if (r) + return r; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - update_chan_counter(alu, chan_counter); + for (i = 0; i < 5; ++i) { + if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { + gpr[i] = prev[i]->dst.sel; + /* cube writes more than PV.X */ + if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) + chan[i] = 0; + else + chan[i] = prev[i]->dst.chan; + } else + gpr[i] = -1; } - if (chan_counter[0] > 3 || - chan_counter[1] > 3 || - chan_counter[2] > 3 || - chan_counter[3] > 3) { - R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n", - alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]); - return -1; + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu = slots[i]; + if(!alu) + continue; + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; ++src) { + if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) + continue; + + if (alu->src[src].sel == gpr[4] && + alu->src[src].chan == chan[4]) { + alu->src[src].sel = V_SQ_ALU_SRC_PS; + alu->src[src].chan = 0; + continue; + } + + for (j = 0; j < 4; ++j) { + if (alu->src[src].sel == gpr[j] && + alu->src[src].chan == j) { + alu->src[src].sel = V_SQ_ALU_SRC_PV; + alu->src[src].chan = chan[j]; + break; + } + } + } } + return 0; } -#endif -static int is_const(int sel) +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) { - if (sel > 255 && sel < 512) - return 1; - if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL) - return 1; - return 0; + switch(value) { + case 0: + *sel = V_SQ_ALU_SRC_0; + break; + case 1: + *sel = V_SQ_ALU_SRC_1_INT; + break; + case -1: + *sel = V_SQ_ALU_SRC_M_1_INT; + break; + case 0x3F800000: // 1.0f + *sel = V_SQ_ALU_SRC_1; + break; + case 0x3F000000: // 0.5f + *sel = V_SQ_ALU_SRC_0_5; + break; + case 0xBF800000: // -1.0f + *sel = V_SQ_ALU_SRC_1; + *neg ^= 1; + break; + case 0xBF000000: // -0.5f + *sel = V_SQ_ALU_SRC_0_5; + *neg ^= 1; + break; + default: + *sel = V_SQ_ALU_SRC_LITERAL; + break; + } } -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) +/* compute how many literal are needed */ +static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, + uint32_t literal[4], unsigned *nliteral) { - unsigned swizzle_key; - - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; - return 0; + unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value; + unsigned found = 0; + for (j = 0; j < *nliteral; ++j) { + if (literal[j] == value) { + found = 1; + break; + } + } + if (!found) { + if (*nliteral >= 4) + return -EINVAL; + literal[(*nliteral)++] = value; + } + } } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - - alu->bank_swizzle = bank_swizzle_scl[swizzle_key]; return 0; } -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu) +static void r600_bc_alu_adjust_literals(struct r600_bc *bc, + struct r600_bc_alu *alu, + uint32_t literal[4], unsigned nliteral) +{ + unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value; + for (j = 0; j < nliteral; ++j) { + if (literal[j] == value) { + alu->src[i].chan = j; + break; + } + } + } + } +} + +static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], + struct r600_bc_alu *alu_prev) { - unsigned swizzle_key; + struct r600_bc_alu *prev[5]; + struct r600_bc_alu *result[5] = { NULL }; + + uint32_t literal[4], prev_literal[4]; + unsigned nliteral = 0, prev_nliteral = 0; - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; + int i, j, r, src, num_src; + int num_once_inst = 0; + int have_mova = 0, have_rel = 0; + + r = assign_alu_units(bc, alu_prev, prev); + if (r) + return r; + + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu; + + /* check number of literals */ + if (prev[i]) { + if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral)) + return 0; + if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) + return 0; + if (is_alu_mova_inst(bc, prev[i])) { + if (have_rel) + return 0; + have_mova = 1; + } + num_once_inst += is_alu_once_inst(bc, prev[i]); + } + if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) + return 0; + + // let's check used slots + if (prev[i] && !slots[i]) { + result[i] = prev[i]; + continue; + } else if (prev[i] && slots[i]) { + if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { + // trans unit is still free try to use it + if (is_alu_any_unit_inst(bc, slots[i])) { + result[i] = prev[i]; + result[4] = slots[i]; + } else if (is_alu_any_unit_inst(bc, prev[i])) { + result[i] = slots[i]; + result[4] = prev[i]; + } else + return 0; + } else + return 0; + } else if(!slots[i]) { + continue; + } else + result[i] = slots[i]; + + // let's check source gprs + alu = slots[i]; + num_once_inst += is_alu_once_inst(bc, alu); + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; ++src) { + if (alu->src[src].rel) { + if (have_mova) + return 0; + have_rel = 1; + } + + // constants doesn't matter + if (!is_gpr(alu->src[src].sel)) + continue; + + for (j = 0; j < 5; ++j) { + if (!prev[j] || !prev[j]->dst.write) + continue; + + // if it's relative then we can't determin which gpr is really used + if (prev[j]->dst.chan == alu->src[src].chan && + (prev[j]->dst.sel == alu->src[src].sel || + prev[j]->dst.rel || alu->src[src].rel)) + return 0; + } + } + } + + /* more than one PRED_ or KILL_ ? */ + if (num_once_inst > 1) return 0; + + /* check if the result can still be swizzlet */ + r = check_and_set_bank_swizzle(bc, result); + if (r) + return 0; + + /* looks like everything worked out right, apply the changes */ + + /* undo adding previus literals */ + bc->cf_last->ndw -= align(prev_nliteral, 2); + + /* sort instructions */ + for (i = 0; i < 5; ++i) { + slots[i] = result[i]; + if (result[i]) { + LIST_DEL(&result[i]->list); + result[i]->last = 0; + LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu); + } + } + + /* determine new last instruction */ + LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; + + /* determine new first instruction */ + for (i = 0; i < 5; ++i) { + if (result[i]) { + bc->cf_last->curr_bs_head = result[i]; + break; + } } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - alu->bank_swizzle = bank_swizzle_vec[swizzle_key]; + bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; + bc->cf_last->prev2_bs_head = NULL; + return 0; } -static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first) +/* This code handles kcache lines as single blocks of 32 constants. We could + * probably do slightly better by recognizing that we actually have two + * consecutive lines of 16 constants, but the resulting code would also be + * somewhat more complicated. */ +static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) { - struct r600_bc_alu *alu = NULL; - int num_instr = 1; + struct r600_bc_kcache *kcache = bc->cf_last->kcache; + unsigned int required_lines; + unsigned int free_lines = 0; + unsigned int cache_line[3]; + unsigned int count = 0; + unsigned int i, j; + int r; + + /* Collect required cache lines. */ + for (i = 0; i < 3; ++i) { + bool found = false; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; - init_gpr(alu_first); + line = ((alu->src[i].sel - 512) / 32) * 2; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - num_instr++; + for (j = 0; j < count; ++j) { + if (cache_line[j] == line) { + found = true; + break; + } + } + + if (!found) + cache_line[count++] = line; } - if (num_instr == 1) { - check_scalar(bc, alu_first); - - } else { -/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/ - check_vector(bc, alu_first); - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - check_vector(bc, alu); + /* This should never actually happen. */ + if (count >= 3) return -ENOMEM; + + for (i = 0; i < 2; ++i) { + if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) { + ++free_lines; + } + } + + /* Filter lines pulled in by previous intructions. Note that this is + * only for the required_lines count, we can't remove these from the + * cache_line array since we may have to start a new ALU clause. */ + for (i = 0, required_lines = count; i < count; ++i) { + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + --required_lines; + break; + } + } + } + + /* Start a new ALU clause if needed. */ + if (required_lines > free_lines) { + if ((r = r600_bc_add_cf(bc))) { + return r; + } + bc->cf_last->inst = (type << 3); + kcache = bc->cf_last->kcache; + } + + /* Setup the kcache lines. */ + for (i = 0; i < count; ++i) { + bool found = false; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + found = true; + break; + } + } + + if (found) continue; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) { + kcache[j].bank = 0; + kcache[j].addr = cache_line[i]; + kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2; + break; + } + } + } + + /* Alter the src operands to refer to the kcache. */ + for (i = 0; i < 3; ++i) { + static const unsigned int base[] = {128, 160, 256, 288}; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; + + alu->src[i].sel -= 512; + line = (alu->src[i].sel / 32) * 2; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == line) { + alu->src[i].sel &= 0x1f; + alu->src[i].sel += base[j]; + break; + } } } + return 0; } @@ -419,62 +1146,100 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu == NULL) return -ENOMEM; memcpy(nalu, alu, sizeof(struct r600_bc_alu)); - nalu->nliteral = 0; + + if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { + /* check if we could add it anyway */ + if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) && + type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) { + LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { + if (lalu->predicate) { + bc->force_add_cf = 1; + break; + } + } + } else + bc->force_add_cf = 1; + } /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || - bc->force_add_cf) { + if (bc->cf_last == NULL || bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nalu); return r; } - bc->cf_last->inst = (type << 3); } + bc->cf_last->inst = (type << 3); + + /* Setup the kcache for this ALU instruction. This will start a new + * ALU clause if needed. */ + if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + free(nalu); + return r; + } + if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; - LIST_INITHEAD(&nalu->bs_list); - } else { - LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); - } - /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) - * worst case */ - if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { - bc->force_add_cf = 1; } /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { - if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { - bc->ngpr = alu->src[i].sel + 1; - } - /* compute how many literal are needed - * either 2 or 4 literals - */ - if (alu->src[i].sel == 253) { - if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { - nalu->nliteral = (alu->src[i].chan + 2) & 0x6; - } + if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { + bc->ngpr = nalu->src[i].sel + 1; } + if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) + r600_bc_special_constants(nalu->src[i].value, + &nalu->src[i].sel, &nalu->src[i].neg); } - if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { - lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!lalu->last && lalu->nliteral > nalu->nliteral) { - nalu->nliteral = lalu->nliteral; - } - } - if (alu->dst.sel >= bc->ngpr) { - bc->ngpr = alu->dst.sel + 1; + if (nalu->dst.sel >= bc->ngpr) { + bc->ngpr = nalu->dst.sel + 1; } LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); /* each alu use 2 dwords */ bc->cf_last->ndw += 2; bc->ndw += 2; - bc->cf_last->kcache0_mode = 2; - /* process cur ALU instructions for bank swizzle */ - if (alu->last) { - check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); + if (nalu->last) { + uint32_t literal[4]; + unsigned nliteral; + struct r600_bc_alu *slots[5]; + r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); + if (r) + return r; + + if (bc->cf_last->prev_bs_head) { + r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + if (bc->cf_last->prev_bs_head) { + r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + r = check_and_set_bank_swizzle(bc, slots); + if (r) + return r; + + for (i = 0, nliteral = 0; i < 5; i++) { + if (slots[i]) { + r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); + if (r) + return r; + } + } + bc->cf_last->ndw += align(nliteral, 2); + + /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) + * worst case */ + if ((bc->cf_last->ndw >> 1) >= 120) { + bc->force_add_cf = 1; + } + + bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; + bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; bc->cf_last->curr_bs_head = NULL; } return 0; @@ -485,42 +1250,22 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) +static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) { - struct r600_bc_alu *alu; + switch (bc->chiprev) { + case CHIPREV_R600: + return 8; - if (bc->cf_last == NULL) { - return 0; - } - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - return 0; - } - /* all same on EG */ - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { - return 0; - } - /* same on EG */ - if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && - (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || - LIST_IS_EMPTY(&bc->cf_last->alu)) { - R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); - return -EINVAL; - } - alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!alu->last || !alu->nliteral || alu->literal_added) { - return 0; + case CHIPREV_R700: + return 16; + + case CHIPREV_EVERGREEN: + return 64; + + default: + R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + return 8; } - memcpy(alu->value, value, 4 * 4); - bc->cf_last->ndw += alu->nliteral; - bc->ndw += alu->nliteral; - alu->literal_added = 1; - return 0; } int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) @@ -548,7 +1293,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -562,6 +1307,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) return -ENOMEM; memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + /* we can't fetch data und use it as texture lookup address in the same TEX clause */ + if (bc->cf_last != NULL && + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + struct r600_bc_tex *ttex; + LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { + if (ttex->dst_gpr == ntex->src_gpr) { + bc->force_add_cf = 1; + break; + } + } + } + /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || @@ -573,11 +1330,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) } bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX; } + if (ntex->src_gpr >= bc->ngpr) { + bc->ngpr = ntex->src_gpr + 1; + } + if (ntex->dst_gpr >= bc->ngpr) { + bc->ngpr = ntex->dst_gpr + 1; + } LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -597,31 +1360,8 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - unsigned fetch_resource_start = 0; - - /* check if we are fetch shader */ - /* fetch shader can also access vertex resource, - * first fetch shader resource is at 160 - */ - if (bc->type == -1) { - switch (bc->chiprev) { - /* r600 */ - case CHIPREV_R600: - /* r700 */ - case CHIPREV_R700: - fetch_resource_start = 160; - break; - /* evergreen */ - case CHIPREV_EVERGREEN: - fetch_resource_start = 0; - break; - default: - fprintf(stderr, "%s:%s:%d unknown chiprev %d\n", - __FILE__, __func__, __LINE__, bc->chiprev); - break; - } - } - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); @@ -635,7 +1375,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; } @@ -673,8 +1414,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign /* r600 only, r700/eg bits in r700_asm.c */ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | @@ -705,22 +1444,23 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } +static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); +} + /* common for r600/r700 - eg in eg_asm.c */ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -729,15 +1469,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); @@ -745,10 +1487,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + if (bc->chiprev == CHIPREV_R700) + r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + else + r600_bc_cf_vtx_build(&bc->bytecode[id], cf); break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: @@ -756,7 +1498,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | @@ -793,8 +1536,10 @@ int r600_bc_build(struct r600_bc *bc) struct r600_bc_alu *alu; struct r600_bc_vtx *vtx; struct r600_bc_tex *tex; + uint32_t literal[4]; + unsigned nliteral; unsigned addr; - int r; + int i, r; if (bc->callstack[0].max > 0) bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; @@ -808,6 +1553,8 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: @@ -854,8 +1601,16 @@ int r600_bc_build(struct r600_bc *bc) return r; switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + nliteral = 0; + memset(literal, 0, sizeof(literal)); LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + if (r) + return r; + r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); switch(bc->chiprev) { case CHIPREV_R600: r = r600_bc_alu_build(bc, alu, addr); @@ -872,7 +1627,11 @@ int r600_bc_build(struct r600_bc *bc) return r; addr += 2; if (alu->last) { - addr += alu->nliteral; + for (i = 0; i < align(nliteral, 2); ++i) { + bc->bytecode[addr++] = literal[i]; + } + nliteral = 0; + memset(literal, 0, sizeof(literal)); } } break; @@ -953,7 +1712,14 @@ void r600_bc_clear(struct r600_bc *bc) void r600_bc_dump(struct r600_bc *bc) { - unsigned i; + struct r600_bc_cf *cf = NULL; + struct r600_bc_alu *alu = NULL; + struct r600_bc_vtx *vtx = NULL; + struct r600_bc_tex *tex = NULL; + + unsigned i, id; + uint32_t literal[4]; + unsigned nliteral; char chip = '6'; switch (bc->chiprev) { @@ -968,84 +1734,191 @@ void r600_bc_dump(struct r600_bc *bc) chip = '6'; break; } - fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw); + fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr); fprintf(stderr, " %c\n", chip); - for (i = 0; i < bc->ndw; i++) { - fprintf(stderr, "0x%08X\n", bc->bytecode[i]); - } - fprintf(stderr, "--------------------------------------\n"); -} -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + id = cf->id; -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((count - 8) - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d ", cf->addr); + fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); + fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); + fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); + id++; + fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); + fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); + fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); + fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d\n", cf->addr); + id++; + fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); + fprintf(stderr, "GPR:%X ", cf->output.gpr); + fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); + fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); + fprintf(stderr, "TYPE:%X\n", cf->output.type); + id++; + fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); + fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); + fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); + fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); + fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); + fprintf(stderr, "BARRIER:%X ", cf->output.barrier); + fprintf(stderr, "INST:%d ", cf->output.inst); + fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); + fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d\n", cf->cf_addr); + id++; + fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "COND:%X ", cf->cond); + fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); + break; + } + + id = cf->addr; + nliteral = 0; + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); + fprintf(stderr, "REL:%d ", alu->src[0].rel); + fprintf(stderr, "CHAN:%d ", alu->src[0].chan); + fprintf(stderr, "NEG:%d) ", alu->src[0].neg); + fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel); + fprintf(stderr, "REL:%d ", alu->src[1].rel); + fprintf(stderr, "CHAN:%d ", alu->src[1].chan); + fprintf(stderr, "NEG:%d) ", alu->src[1].neg); + fprintf(stderr, "LAST:%d)\n", alu->last); + id++; + fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' '); + fprintf(stderr, "INST:%d ", alu->inst); + fprintf(stderr, "DST(SEL:%d ", alu->dst.sel); + fprintf(stderr, "CHAN:%d ", alu->dst.chan); + fprintf(stderr, "REL:%d ", alu->dst.rel); + fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp); + fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle); + if (alu->is_op3) { + fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel); + fprintf(stderr, "REL:%d ", alu->src[2].rel); + fprintf(stderr, "CHAN:%d ", alu->src[2].chan); + fprintf(stderr, "NEG:%d)\n", alu->src[2].neg); + } else { + fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs); + fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs); + fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write); + fprintf(stderr, "OMOD:%d ", alu->omod); + fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate); + fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate); + } + + id++; + if (alu->last) { + for (i = 0; i < nliteral; i++, id++) { + float *f = (float*)(bc->bytecode + id); + fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f); + } + id += nliteral & 1; + nliteral = 0; + } + } + + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", tex->inst); + fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id); + fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr); + fprintf(stderr, "REL:%d)\n", tex->src_rel); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr); + fprintf(stderr, "REL:%d ", tex->dst_rel); + fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w); + fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias); + fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x); + fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y); + fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z); + fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET_X:%d ", tex->offset_x); + fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y); + fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z); + fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id); + fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z); + fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w); + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; + } + + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", vtx->inst); + fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type); + fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id); + id++; + /* This assumes that no semantic fetches exist */ + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); + fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); + fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); + fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); + fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); + fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); + fprintf(stderr, "NUM:%d ", vtx->num_format_all); + fprintf(stderr, "COMP:%d ", vtx->format_comp_all); + fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET:%d\n", vtx->offset); + //TODO + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; + } } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); + + fprintf(stderr, "--------------------------------------\n"); } static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, @@ -1071,7 +1944,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, } switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ + /* Half-floats, floats, ints */ case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[i].size) { case 16: @@ -1083,8 +1956,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_FLOAT; break; case 3: - *format = FMT_16_16_16_FLOAT; - break; case 4: *format = FMT_16_16_16_16_FLOAT; break; @@ -1124,8 +1995,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_8_8; break; case 3: - // *format = FMT_8_8_8; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_8_8_8_8; break; @@ -1140,8 +2009,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16; break; case 3: - // *format = FMT_16_16_16; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_16_16_16_16; break; @@ -1184,64 +2051,21 @@ out_unknown: R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); } -static void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode) -{ - unsigned i; - char chip = '6'; - - switch (chiprev) { - case 1: - chip = '7'; - break; - case 2: - chip = 'E'; - break; - case 0: - default: - chip = '6'; - break; - } - fprintf(stderr, "bytecode %d dw -----------------------\n", ndw); - fprintf(stderr, " %c\n", chip); - for (i = 0; i < ndw; i++) { - fprintf(stderr, "0x%08X\n", bytecode[i]); - } - fprintf(stderr, "--------------------------------------\n"); -} - int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) { - unsigned ndw, i; - u32 *bytecode; - unsigned fetch_resource_start = 0, format, num_format, format_comp; + static int dump_shaders = -1; + + struct r600_bc bc; + struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - - /* 2 dwords for cf aligned to 4 + 4 dwords per input */ - ndw = 8 + ve->count * 4; - ve->fs_size = ndw * 4; - - /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); - if (ve->fetch_shader == NULL) { - return -ENOMEM; - } - - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); - if (bytecode == NULL) { - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); - return -ENOMEM; - } - - if (rctx->family >= CHIP_CEDAR) { - eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - } else { - r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - fetch_resource_start = 160; - } + unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned format, num_format, format_comp; + u32 *bytecode; + int i, r; /* vertex elements offset need special handling, if offset is bigger - * than what we can put in fetch instruction then we need to alterate + + * than what we can put in fetch instruction then we need to alterate * the vertex resource offset. In such case in order to simplify code * we will bound one resource per elements. It's a worst case scenario. */ @@ -1252,40 +2076,111 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } } + memset(&bc, 0, sizeof(bc)); + r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); + if (r) + return r; + + for (i = 0; i < ve->count; i++) { + if (elements[i].instance_divisor > 1) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + } + } + for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; - r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp); - desc = util_format_description(ve->hw_format[i]); + r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); + desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - R600_ERR("unknown format %d\n", ve->hw_format[i]); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + r600_bc_clear(&bc); + R600_ERR("unknown format %d\n", ve->elements[i].src_format); return -EINVAL; } /* see above for vbuffer_need_offset explanation */ vbuffer_index = elements[i].vertex_buffer_index; - if (ve->vbuffer_need_offset) { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); - } else { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start; + vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; + vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; + vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; + vtx.mega_fetch_count = 0x1F; + vtx.dst_gpr = i + 1; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.srf_mode_all = 1; + vtx.offset = elements[i].src_offset; + + if ((r = r600_bc_add_vtx(&bc, &vtx))) { + r600_bc_clear(&bc); + return r; } - bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | - S_SQ_VTX_WORD0_SRC_SEL_X(0) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | - S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | - S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | - S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | - S_SQ_VTX_WORD1_DATA_FORMAT(format) | - S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | - S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | - S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); - bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); - bytecode[8 + i * 4 + 3] = 0; } + + r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + + if ((r = r600_bc_build(&bc))) { + r600_bc_clear(&bc); + return r; + } + + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + r600_bc_dump(&bc); + fprintf(stderr, "______________________________________________________________\n"); + } + + ve->fs_size = bc.ndw*4; + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + r600_bc_clear(&bc); + return -ENOMEM; + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bc_clear(&bc); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + memcpy(bytecode, bc.bytecode, ve->fs_size); + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + r600_bc_clear(&bc); + + if (rctx->family >= CHIP_CEDAR) + evergreen_fetch_shader(ve); + else + r600_fetch_shader(ve); + return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index b147f0f5c8..27ea293ebe 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -25,9 +25,6 @@ #include "util/u_double_list.h" -#define NUM_OF_CYCLES 3 -#define NUM_OF_COMPONENTS 4 - struct r600_vertex_element; struct r600_pipe_context; @@ -37,6 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; + uint32_t value; }; struct r600_bc_alu_dst { @@ -49,19 +47,15 @@ struct r600_bc_alu_dst { struct r600_bc_alu { struct list_head list; - struct list_head bs_list; /* bank swizzle list */ struct r600_bc_alu_src src[3]; struct r600_bc_alu_dst dst; unsigned inst; unsigned last; unsigned is_op3; unsigned predicate; - unsigned nliteral; - unsigned literal_added; unsigned bank_swizzle; unsigned bank_swizzle_force; - u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + unsigned omod; }; struct r600_bc_tex { @@ -109,6 +103,7 @@ struct r600_bc_vtx { unsigned num_format_all; unsigned format_comp_all; unsigned srf_mode_all; + unsigned offset; }; struct r600_bc_output { @@ -122,9 +117,16 @@ struct r600_bc_output { unsigned swizzle_y; unsigned swizzle_z; unsigned swizzle_w; + unsigned burst_count; unsigned barrier; }; +struct r600_bc_kcache { + unsigned bank; + unsigned mode; + unsigned addr; +}; + struct r600_bc_cf { struct list_head list; unsigned inst; @@ -134,18 +136,15 @@ struct r600_bc_cf { unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ - unsigned kcache0_mode; - unsigned kcache1_mode; - unsigned kcache0_addr; - unsigned kcache1_addr; - unsigned kcache0_bank; - unsigned kcache1_bank; + struct r600_bc_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; struct r600_bc_output output; struct r600_bc_alu *curr_bs_head; + struct r600_bc_alu *prev_bs_head; + struct r600_bc_alu *prev2_bs_head; }; #define FC_NONE 0 @@ -191,26 +190,24 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ +void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf); int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 0f04136fb2..04408a5cc8 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + rctx->blit = true; r600_context_queries_suspend(&rctx->ctx); util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); @@ -53,9 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (rctx->states[R600_PIPE_STATE_CLIP]) { util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - - rctx->vertex_elements = NULL; + util_blitter_save_vertex_buffers(rctx->blitter, + rctx->vbuf_mgr->nr_vertex_buffers, + rctx->vbuf_mgr->vertex_buffer); if (op & (R600_CLEAR_SURFACE | R600_COPY)) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); @@ -76,21 +77,26 @@ static void r600_blitter_end(struct pipe_context *ctx) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_context_queries_resume(&rctx->ctx); + rctx->blit = false; } -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct pipe_surface *zsurf, *cbsurf, surf_tmpl; int level = 0; float depth = 1.0f; - surf_tmpl.format = texture->resource.base.b.format; + + if (!texture->dirty_db) + return; + + surf_tmpl.format = texture->resource.b.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl); + zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl); surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; @@ -108,8 +114,47 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); + texture->dirty_db = FALSE; +} + +void r600_flush_depth_textures(struct r600_pipe_context *rctx) +{ + unsigned int i; + + if (rctx->blit) return; + + /* FIXME: This handles fragment shader textures only. */ + + for (i = 0; i < rctx->ps_samplers.n_views; ++i) { + struct r600_pipe_sampler_view *view; + struct r600_resource_texture *tex; + + view = rctx->ps_samplers.views[i]; + if (!view) continue; + + tex = (struct r600_resource_texture *)view->base.texture; + if (!tex->depth) + continue; + + if (tex->is_flushing_texture) + continue; - return 0; + r600_blit_uncompress_depth(&rctx->context, tex); + } + + /* also check CB here */ + for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + struct r600_resource_texture *tex; + tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture; + + if (!tex->depth) + continue; + + if (tex->is_flushing_texture) + continue; + + r600_blit_uncompress_depth(&rctx->context, tex); + } } static void r600_clear(struct pipe_context *ctx, unsigned buffers, @@ -174,6 +219,52 @@ static void r600_hw_copy_region(struct pipe_context *ctx, r600_blitter_end(ctx); } +struct texture_orig_info { + unsigned format; + unsigned width0; + unsigned height0; +}; + +static void r600_compressed_to_blittable(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + unsigned pixsize = util_format_get_blocksize(tex->format); + int new_format; + int new_height, new_width; + + orig->format = tex->format; + orig->width0 = tex->width0; + orig->height0 = tex->height0; + + if (pixsize == 8) + new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */ + else + new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */ + + new_width = util_format_get_nblocksx(tex->format, orig->width0); + new_height = util_format_get_nblocksy(tex->format, orig->height0); + + rtex->force_int_type = true; + tex->width0 = new_width; + tex->height0 = new_height; + tex->format = new_format; + +} + +static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + rtex->force_int_type = false; + + tex->format = orig->format; + tex->width0 = orig->width0; + tex->height0 = orig->height0; +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -182,15 +273,36 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned src_level, const struct pipe_box *src_box) { - boolean is_depth; - /* there is something wrong with depth resource copies at the moment so avoid them for now */ - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) - util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); - else - r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; + struct texture_orig_info orig_info[2]; + boolean restore_orig[2]; + + if (rsrc->depth && !rsrc->is_flushing_texture) + r600_texture_depth_flush(ctx, src, FALSE); + + restore_orig[0] = restore_orig[1] = FALSE; + + if (util_format_is_compressed(src->format)) { + r600_compressed_to_blittable(src, src_level, &orig_info[0]); + restore_orig[0] = TRUE; + } + + if (util_format_is_compressed(dst->format)) { + r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); + restore_orig[1] = TRUE; + /* translate the dst box as well */ + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + dsty = util_format_get_nblocksy(orig_info[1].format, dsty); + } + + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + + if (restore_orig[0]) + r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); + + if (restore_orig[1]) + r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) @@ -200,3 +312,19 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx) rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } + +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct pipe_box sbox; + + sbox.x = sbox.y = sbox.z = 0; + sbox.width = texture->resource.b.b.b.width0; + sbox.height = texture->resource.b.b.b.height0; + /* XXX that might be wrong */ + sbox.depth = 1; + + r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0, + 0, 0, 0, + (struct pipe_resource *)texture->flushed_depth_texture, 0, + &sbox); +} diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 03a61a3213..6ced719c8f 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,83 +29,50 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include <util/u_upload_mgr.h> +#include "util/u_upload_mgr.h" + #include "state_tracker/drm_driver.h" + #include <xf86drm.h> #include "radeon_drm.h" + #include "r600.h" #include "r600_pipe.h" -extern struct u_resource_vtbl r600_buffer_vtbl; - - -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - struct r600_resource_buffer *rbuffer; - struct r600_bo *bo; - /* XXX We probably want a different alignment for buffers and textures. */ - unsigned alignment = 4096; - - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; - - rbuffer->magic = R600_BUFFER_MAGIC; - rbuffer->user_buffer = NULL; - rbuffer->num_ranges = 0; - rbuffer->r.base.b = *templ; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.size = rbuffer->r.base.b.width0; - bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); - if (bo == NULL) { - FREE(rbuffer); - return NULL; - } - rbuffer->r.bo = bo; - return &rbuffer->r.base.b; -} - -struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind) -{ - struct r600_resource_buffer *rbuffer; - - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; - - rbuffer->magic = R600_BUFFER_MAGIC; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.b.target = PIPE_BUFFER; - rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM; - rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE; - rbuffer->r.base.b.bind = bind; - rbuffer->r.base.b.width0 = bytes; - rbuffer->r.base.b.height0 = 1; - rbuffer->r.base.b.depth0 = 1; - rbuffer->r.base.b.array_size = 1; - rbuffer->r.base.b.flags = 0; - rbuffer->num_ranges = 0; - rbuffer->r.bo = NULL; - rbuffer->user_buffer = ptr; - return &rbuffer->r.base.b; -} - static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { + struct r600_screen *rscreen = (struct r600_screen*)screen; struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } - FREE(rbuffer); + rbuffer->r.bo = NULL; + util_slab_free(&rscreen->pool_buffers, rbuffer); +} + +static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); + + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->layer_stride = 0; + transfer->data = NULL; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; } static void *r600_buffer_transfer_map(struct pipe_context *pipe, @@ -114,29 +81,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); int write = 0; uint8_t *data; - int i; - boolean flush = FALSE; - - if (rbuffer->user_buffer) - return (uint8_t*)rbuffer->user_buffer + transfer->box.x; - - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuffer->num_ranges; i++) { - if ((transfer->box.x >= rbuffer->ranges[i].start) && - (transfer->box.x < rbuffer->ranges[i].end)) - flush = TRUE; - - if (flush) { - r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); - rbuffer->num_ranges = 0; - rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, - rbuffer->r.base.b.width0, 0, - rbuffer->r.base.b.bind, - rbuffer->r.base.b.usage); - break; - } - } - } + + if (rbuffer->r.b.user_ptr) + return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; + if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { /* FIXME */ } @@ -155,44 +103,122 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + if (rbuffer->r.b.user_ptr) + return; + if (rbuffer->r.bo) r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) + struct pipe_transfer *transfer, + const struct pipe_box *box) { - struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; +} - assert(box->x + box->width <= transfer->box.width); +static void r600_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + util_slab_free(&rctx->pool_transfers, transfer); +} - if (rbuffer->user_buffer) - return; +static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_resource_buffer *rbuffer = r600_buffer(resource); + uint8_t *map = NULL; - /* mark the range as used */ - for(i = 0; i < rbuffer->num_ranges; ++i) { - if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) { - rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset); - rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length)); - return; - } - } + assert(rbuffer->r.b.user_ptr == NULL); + + map = r600_bo_map(ws, rbuffer->r.bo, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, + pipe); + + memcpy(map + box->x, data, box->width); - rbuffer->ranges[rbuffer->num_ranges].start = offset; - rbuffer->ranges[rbuffer->num_ranges].end = offset+length; - rbuffer->num_ranges++; + if (rbuffer->r.bo) + r600_bo_unmap(ws, rbuffer->r.bo); } -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) +static const struct u_resource_vtbl r600_buffer_vtbl = { - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + u_default_resource_get_handle, /* get_handle */ + r600_buffer_destroy, /* resource_destroy */ + r600_get_transfer, /* get_transfer */ + r600_transfer_destroy, /* transfer_destroy */ + r600_buffer_transfer_map, /* transfer_map */ + r600_buffer_transfer_flush_region, /* transfer_flush_region */ + r600_buffer_transfer_unmap, /* transfer_unmap */ + r600_buffer_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource_buffer *rbuffer; + struct r600_bo *bo; + /* XXX We probably want a different alignment for buffers and textures. */ + unsigned alignment = 4096; + + rbuffer = util_slab_alloc(&rscreen->pool_buffers); + + rbuffer->magic = R600_BUFFER_MAGIC; + rbuffer->r.b.b.b = *templ; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.user_ptr = NULL; + rbuffer->r.size = rbuffer->r.b.b.b.width0; + rbuffer->r.bo_size = rbuffer->r.size; + + bo = r600_bo((struct radeon*)screen->winsys, + rbuffer->r.b.b.b.width0, + alignment, rbuffer->r.b.b.b.bind, + rbuffer->r.b.b.b.usage); + + if (bo == NULL) { + FREE(rbuffer); + return NULL; + } + rbuffer->r.bo = bo; + return &rbuffer->r.b.b.b; +} + +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource_buffer *rbuffer; + + rbuffer = util_slab_alloc(&rscreen->pool_buffers); + + rbuffer->magic = R600_BUFFER_MAGIC; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.b.target = PIPE_BUFFER; + rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuffer->r.b.b.b.bind = bind; + rbuffer->r.b.b.b.width0 = bytes; + rbuffer->r.b.b.b.height0 = 1; + rbuffer->r.b.b.b.depth0 = 1; + rbuffer->r.b.b.b.array_size = 1; + rbuffer->r.b.b.b.flags = 0; + rbuffer->r.b.user_ptr = ptr; + rbuffer->r.bo = NULL; + rbuffer->r.bo_size = 0; + return &rbuffer->r.b.b.b; } struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, @@ -213,82 +239,39 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, return NULL; } - pipe_reference_init(&rbuffer->base.b.reference, 1); - rbuffer->base.b.target = PIPE_BUFFER; - rbuffer->base.b.screen = screen; - rbuffer->base.vtbl = &r600_buffer_vtbl; + pipe_reference_init(&rbuffer->b.b.b.reference, 1); + rbuffer->b.b.b.target = PIPE_BUFFER; + rbuffer->b.b.b.screen = screen; + rbuffer->b.b.vtbl = &r600_buffer_vtbl; rbuffer->bo = bo; - return &rbuffer->base.b; + return &rbuffer->b.b.b; } -struct u_resource_vtbl r600_buffer_vtbl = +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { - u_default_resource_get_handle, /* get_handle */ - r600_buffer_destroy, /* resource_destroy */ - r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - r600_buffer_transfer_map, /* transfer_map */ - r600_buffer_transfer_flush_region, /* transfer_flush_region */ - r600_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; + struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); + boolean flushed; + + u_upload_data(rctx->vbuf_mgr->uploader, 0, + draw->info.count * draw->index_size, + rbuffer->r.b.user_ptr, + &draw->index_buffer_offset, + &draw->index_buffer, &flushed); +} -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, + uint32_t *const_offset) { - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = draw->index_buffer_offset; - int ret = 0; - - if (r600_buffer_is_user_buffer(draw->index_buffer)) { - ret = u_upload_buffer(rctx->upload_ib, - index_offset, - draw->count * draw->index_size, - draw->index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - draw->index_buffer_offset = index_offset; - - /* Transfer ownership. */ - pipe_resource_reference(&draw->index_buffer, upload_buffer); - pipe_resource_reference(&upload_buffer, NULL); - } + if ((*rbuffer)->r.b.user_ptr) { + uint8_t *ptr = (*rbuffer)->r.b.user_ptr; + unsigned size = (*rbuffer)->r.b.b.b.width0; + boolean flushed; -done: - return ret; -} + *rbuffer = NULL; -int r600_upload_user_buffers(struct r600_pipe_context *rctx) -{ - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = rctx->vertex_elements->count; - nr = rctx->nvertex_buffer; - - for (i = 0; i < nr; i++) { -// struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index]; - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (r600_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(rctx->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; - vb->buffer_offset = upload_offset; - } + u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer, &flushed); + } else { + *const_offset = 0; } - return ret; } diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 2ee0c83e5d..a85d0bbf1e 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -330,10 +330,14 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C +/* TODO Fill in more ALU */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 6842571044..0e28bda6eb 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,12 +30,13 @@ #include <tgsi/tgsi_util.h> #include <util/u_blitter.h> #include <util/u_double_list.h> +#include <util/u_format_s3tc.h> #include <util/u_transfer.h> #include <util/u_surface.h> #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> +#include "util/u_upload_mgr.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -47,7 +48,7 @@ /* * pipe_context */ -static void r600_flush(struct pipe_context *ctx, unsigned flags, +static void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -59,9 +60,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, if (!rctx->ctx.pm4_cdwords) return; - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - #if 0 sprintf(dname, "gallium-%08d.bof", dc); if (dc < 20) { @@ -71,6 +69,30 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; #endif r600_context_flush(&rctx->ctx); + + /* XXX This shouldn't be really necessary, but removing it breaks some tests. + * Needless buffer reallocations may significantly increase memory consumption, + * so getting rid of this call is important. */ + u_upload_flush(rctx->vbuf_mgr->uploader); +} + +static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) +{ + pipe_mutex_lock(rscreen->mutex_num_contexts); + if (diff > 0) { + rscreen->num_contexts++; + + if (rscreen->num_contexts > 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_MULTITHREADED); + } else { + rscreen->num_contexts--; + + if (rscreen->num_contexts <= 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_SINGLETHREADED); + } + pipe_mutex_unlock(rscreen->mutex_num_contexts); } static void r600_destroy_context(struct pipe_context *context) @@ -79,8 +101,6 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); - r600_end_vertex_translate(rctx); - r600_context_fini(&rctx->ctx); util_blitter_destroy(rctx->blitter); @@ -89,14 +109,11 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); + u_vbuf_mgr_destroy(rctx->vbuf_mgr); + util_slab_destroy(&rctx->pool_transfers); - if (rctx->tran.translate_cache) - translate_cache_destroy(rctx->tran.translate_cache); + r600_update_num_contexts(rctx->screen, -1); - FREE(rctx->ps_resource); - FREE(rctx->vs_resource); FREE(rctx); } @@ -108,6 +125,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void if (rctx == NULL) return NULL; + + r600_update_num_contexts(rscreen, 1); + rctx->context.winsys = rscreen->screen.winsys; rctx->context.screen = screen; rctx->context.priv = priv; @@ -123,6 +143,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); r600_init_surface_functions(rctx); + rctx->context.draw_vbo = r600_draw_vbo; switch (r600_get_family(rctx->radeon)) { case CHIP_R600: @@ -137,7 +158,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - rctx->context.draw_vbo = r600_draw_vbo; r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -151,7 +171,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: - rctx->context.draw_vbo = evergreen_draw; + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -165,41 +187,23 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } + util_slab_create(&rctx->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_SLAB_SINGLETHREADED); - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { + rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!rctx->vbuf_mgr) { r600_destroy_context(&rctx->context); return NULL; } rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - rctx->tran.translate_cache = translate_cache_create(); - if (rctx->tran.translate_cache == NULL) { - FREE(rctx); - return NULL; - } - - rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->vs_resource) { - FREE(rctx); - return NULL; - } - - rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->ps_resource) { - FREE(rctx); + r600_destroy_context(&rctx->context); return NULL; } @@ -209,8 +213,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void else rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; - return &rctx->context; } @@ -243,6 +245,9 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_CYPRESS: return "AMD CYPRESS"; case CHIP_HEMLOCK: return "AMD HEMLOCK"; case CHIP_PALM: return "AMD PALM"; + case CHIP_BARTS: return "AMD BARTS"; + case CHIP_TURKS: return "AMD TURKS"; + case CHIP_CAICOS: return "AMD CAICOS"; default: return "AMD unknown"; } } @@ -275,18 +280,32 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_SM3: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + /* R600 doesn't support per-MRT blends */ + if (family == CHIP_R600) + return 0; + else + return 1; /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - return 0; + /* R600 doesn't support per-MRT blends */ + if (family == CHIP_R600) + return 0; + else + return 0; + + case PIPE_CAP_ARRAY_TEXTURES: + /* fix once the CS checker upstream is fixed */ + return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE); /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: @@ -316,6 +335,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + /* Timer queries, present when the clock frequency is non zero. */ + case PIPE_CAP_TIMER_QUERY: + return r600_get_clock_crystal_freq(rscreen->radeon) != 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; @@ -380,9 +403,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_ADDRS: return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; //max native parameters + return R600_MAX_CONST_BUFFER_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; + return R600_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* FIXME */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -403,8 +426,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { @@ -417,7 +439,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { + r600_is_sampler_format_supported(screen, format)) { retval |= PIPE_BIND_SAMPLER_VIEW; } @@ -438,9 +460,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_DEPTH_STENCIL; } - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; + if (usage & PIPE_BIND_VERTEX_BUFFER) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + enum radeon_family family = r600_get_family(rscreen->radeon); + + if (r600_is_vertex_format_supported(format, family)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + } if (usage & PIPE_BIND_TRANSFER_READ) retval |= PIPE_BIND_TRANSFER_READ; @@ -459,6 +486,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) radeon_decref(rscreen->radeon); + util_slab_destroy(&rscreen->pool_buffers); + pipe_mutex_destroy(rscreen->mutex_num_contexts); FREE(rscreen); } @@ -485,6 +514,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); + util_format_s3tc_init(); + + util_slab_create(&rscreen->pool_buffers, + sizeof(struct r600_resource_buffer), 64, + UTIL_SLAB_SINGLETHREADED); + + pipe_mutex_init(rscreen->mutex_num_contexts); return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 485f42166d..396801e4a4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,12 +30,16 @@ #include <pipe/p_screen.h> #include <pipe/p_context.h> #include <util/u_math.h> -#include "translate/translate_cache.h" +#include "util/u_slab.h" +#include "util/u_vbuf_mgr.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" #include "r600_resource.h" +#define R600_MAX_CONST_BUFFERS 1 +#define R600_MAX_CONST_BUFFER_SIZE 4096 + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -62,6 +66,11 @@ struct r600_screen { struct pipe_screen screen; struct radeon *radeon; struct r600_tiling_info *tiling_info; + struct util_slab_mempool pool_buffers; + unsigned num_contexts; + + /* for thread-safe write accessing to num_contexts */ + pipe_mutex mutex_num_contexts; }; struct r600_pipe_sampler_view { @@ -86,9 +95,7 @@ struct r600_vertex_element { unsigned count; struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; - boolean incompatible_layout; + struct u_vbuf_mgr_elements *vmgr_elements; struct r600_bo *fetch_shader; unsigned fs_size; struct r600_pipe_state rstate; @@ -111,30 +118,18 @@ struct r600_pipe_shader { #define NUM_TEX_UNITS 16 struct r600_textures_info { - struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; unsigned n_views; void *samplers[NUM_TEX_UNITS]; unsigned n_samplers; }; -/* vertex buffer translation context, used to translate vertex input that - * hw doesn't natively support, so far only FLOAT64 is unsupported. - */ -struct r600_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - void *new_velems; -}; - #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - struct pipe_framebuffer_state *pframebuffer; unsigned family; void *custom_dsa_flush; struct r600_screen *screen; @@ -142,43 +137,35 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; + struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffer; unsigned cb_target_mask; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned nvs_resource; - struct r600_pipe_state *vs_resource; - struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; + struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; - unsigned any_user_vbs; struct r600_textures_info ps_samplers; - unsigned vb_max_index; - struct r600_translate_context tran; + + struct u_vbuf_mgr *vbuf_mgr; + struct util_slab_mempool pool_transfers; + bool blit; }; struct r600_drawl { + struct pipe_draw_info info; struct pipe_context *ctx; - unsigned mode; - unsigned min_index; - unsigned max_index; - unsigned index_bias; - unsigned start; - unsigned count; unsigned index_size; unsigned index_buffer_offset; struct pipe_resource *index_buffer; @@ -187,16 +174,21 @@ struct r600_drawl { /* evergreen_state.c */ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_fetch_shader(struct r600_vertex_element *ve); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_flush_depth_textures(struct r600_pipe_context *rctx); /* r600_buffer.c */ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, @@ -204,13 +196,9 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); -int r600_upload_user_buffers(struct r600_pipe_context *rctx); +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -219,7 +207,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, @@ -227,11 +214,17 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, /* r600_state.c */ void r600_init_state_functions(struct r600_pipe_context *rctx); -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); +void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void r600_fetch_shader(struct r600_vertex_element *ve); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); -void r600_vertex_buffer_update(struct r600_pipe_context *rctx); +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); @@ -239,15 +232,13 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); void r600_init_surface_functions(struct r600_pipe_context *r600); -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, +uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format, + const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned layer); /* r600_translate.c */ -void r600_begin_vertex_translate(struct r600_pipe_context *rctx); -void r600_end_vertex_translate(struct r600_pipe_context *rctx); void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, @@ -270,13 +261,16 @@ void r600_sampler_view_destroy(struct pipe_context *ctx, void r600_bind_state(struct pipe_context *ctx, void *state); void r600_delete_state(struct pipe_context *ctx, void *state); void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); - void *r600_create_shader_state(struct pipe_context *ctx, const struct pipe_shader_state *state); void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); + /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 726668260c..181ea3f9e4 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_pipe.h" +#include "r600d.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { @@ -61,11 +62,35 @@ static boolean r600_get_query_result(struct pipe_context *ctx, struct r600_query *rquery = (struct r600_query *)query; if (rquery->num_results) { - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + uint mode) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + int wait_flag = 0; + + if (!query) { + rctx->ctx.predicate_drawing = false; + r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + wait_flag = 1; + } + + rctx->ctx.predicate_drawing = true; + r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag); + +} + void r600_init_query_functions(struct r600_pipe_context *rctx) { rctx->context.create_query = r600_create_query; @@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx) rctx->context.begin_query = r600_begin_query; rctx->context.end_query = r600_end_query; rctx->context.get_query_result = r600_get_query_result; + + if (r600_get_num_backends(rctx->screen->radeon) > 0) + rctx->context.render_condition = r600_render_condition; } diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 207642ccfa..f3ab3613c8 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -61,5 +61,4 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600) r600->context.transfer_unmap = u_transfer_unmap_vtbl; r600->context.transfer_destroy = u_transfer_destroy_vtbl; r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 25aa84682c..836e7491f1 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -24,6 +24,7 @@ #define R600_RESOURCE_H #include "util/u_transfer.h" +#include "util/u_vbuf_mgr.h" /* flag to indicate a resource is to be used as a transfer so should not be tiled */ #define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV @@ -43,25 +44,45 @@ struct r600_transfer { * underlying implementations. */ struct r600_resource { - struct u_resource base; + struct u_vbuf_resource b; struct r600_bo *bo; u32 size; + unsigned bo_size; }; struct r600_resource_texture { struct r600_resource resource; unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */ + unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; unsigned size; - unsigned tiled; unsigned tile_type; unsigned depth; - unsigned dirty; - struct r600_resource_texture *flushed_depth_texture; + unsigned dirty_db; + struct r600_resource_texture *flushed_depth_texture; + boolean is_flushing_texture; + + /* on some cards we have to use integer 64/128-bit types + for s3tc blits, do this until gallium grows int formats */ + boolean force_int_type; +}; + +#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED) + +#define R600_BUFFER_MAGIC 0xabcd1600 + +/* XXX this could be removed */ +struct r600_resource_buffer { + struct r600_resource r; + uint32_t magic; +}; + +struct r600_surface { + struct pipe_surface base; + unsigned aligned_height; }; void r600_init_screen_resource_functions(struct pipe_screen *screen); @@ -73,41 +94,17 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *base, struct winsys_handle *whandle); -#define R600_BUFFER_MAGIC 0xabcd1600 -#define R600_BUFFER_MAX_RANGES 32 - -struct r600_buffer_range { - uint32_t start; - uint32_t end; -}; - -struct r600_resource_buffer { - struct r600_resource r; - uint32_t magic; - void *user_buffer; - struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES]; - unsigned num_ranges; -}; - /* r600_buffer */ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer) { if (buffer) { assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC); return (struct r600_resource_buffer *)buffer; - } - return NULL; -} - -static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) -{ - return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; + } + return NULL; } -int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture); - -extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); +int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, @@ -122,9 +119,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); -struct r600_surface { - struct pipe_surface base; - unsigned aligned_height; -}; +struct r600_pipe_context; + +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d6455023a3..e7285d624e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,60 +28,12 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include <stdio.h> #include <errno.h> -static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned spi_vs_out_id[10]; - unsigned i, tmp; - - /* clear previous register */ - rstate->nregs = 0; - - /* so far never got proper semantic id from tgsi */ - /* FIXME better to move this in config things so they get emited - * only one time per cs - */ - for (i = 0; i < 10; i++) { - spi_vs_out_id[i] = 0; - } - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - spi_vs_out_id[i / 4] |= tmp; - } - for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, - R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); - } - - r600_pipe_state_add_reg(rstate, - R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028868_SQ_PGM_RESOURCES_VS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - - r600_pipe_state_add_reg(rstate, - R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); - -} - int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) { @@ -96,98 +48,7 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, return 0; } -static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; - int pos_index = -1, face_index = -1; - - rstate->nregs = 0; - - for (i = 0; i < rshader->ninput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - face_index = i; - } - - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); - if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_STENCIL_REF_EXPORT_ENABLE(1), - S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; - } - } - exports_ps |= S_028854_EXPORT_COLORS(num_cout); - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - spi_input_z = 0; - if (pos_index != -1) { - spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | - S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | - S_0286CC_BARYC_SAMPLE_CNTL(1)); - spi_input_z |= 1; - } - - spi_ps_in_control_1 = 0; - if (face_index != -1) { - spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | - S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); - } - - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_028850_SQ_PGM_RESOURCES_PS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } - r600_pipe_state_add_reg(rstate, - R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); -} - -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; @@ -225,14 +86,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) return 0; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { + static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + tgsi_dump(tokens, 0); + } shader->shader.family = r600_get_family(rctx->radeon); r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { @@ -244,8 +114,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("building bytecode failed !\n"); return r; } -//r600_bc_dump(&shader->shader.bc); -//fprintf(stderr, "______________________________________________________________\n"); + if (dump_shaders) { + r600_bc_dump(&shader->shader.bc); + fprintf(stderr, "______________________________________________________________\n"); + } return r600_pipe_shader(ctx, shader); } @@ -262,6 +134,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader */ struct r600_shader_tgsi_instruction; +struct r600_shader_src { + unsigned sel; + unsigned swizzle[4]; + unsigned neg; + unsigned abs; + unsigned rel; + uint32_t value[4]; +}; + struct r600_shader_ctx { struct tgsi_shader_info info; struct tgsi_parse_context parse; @@ -269,10 +150,11 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; - u32 value[4]; + struct r600_shader_src src[3]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -391,6 +273,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; unsigned i; + int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -422,6 +305,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: break; + + case TGSI_FILE_SYSTEM_VALUE: + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = 0; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + break; + } + default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); return -EINVAL; @@ -481,9 +384,187 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +static void tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + struct r600_shader_src *r600_src) +{ + memset(r600_src, 0, sizeof(*r600_src)); + r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; + r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; + r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; + r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; + r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; + + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + int index; + if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { + + index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) + return; + } + index = tgsi_src->Register.Index; + r600_src->sel = V_SQ_ALU_SRC_LITERAL; + memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ + r600_src->swizzle[0] = 3; + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = 0; + } else { + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; + r600_src->sel = tgsi_src->Register.Index; + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + } +} + +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + +static int tgsi_split_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].rel = ctx->src[i].rel; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + ctx->src[i].rel =0; + j--; + } + } + return 0; +} + +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + nliteral++; + } + } + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].value = ctx->src[i].value[k]; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + j--; + } + } + return 0; +} + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; + struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; @@ -506,7 +587,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 * Values [160,191] correspond to constant buffer bank 1 - * Values [256,511] correspond to cfile constants c[0..255]. + * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) + * Values [256,287] correspond to constant buffer bank 2 (EG) + * Values [288,319] correspond to constant buffer bank 3 (EG) * Other special values are shown in the list below. * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) @@ -540,15 +623,18 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; + /* Outside the GPR range. This will be translated to one of the + * kcache banks later. */ + ctx.file_offset[TGSI_FILE_CONSTANT] = 512; - ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; - + shader->fs_write_all = FALSE; while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { @@ -577,7 +663,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.max_driver_temp_used = 0; /* reserve first tmp for everyone */ r600_get_temp(&ctx); + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else @@ -585,9 +676,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = ctx.inst_info->process(&ctx); if (r) goto out_err; - r = r600_bc_add_literal(ctx.bc, ctx.value); - if (r) - goto out_err; + break; + case TGSI_TOKEN_TYPE_PROPERTY: + property = &ctx.parse.FullToken.FullProperty; + if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + if (property->u[0].Data == 1) + shader->fs_write_all = TRUE; + } break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); @@ -605,6 +700,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; @@ -668,6 +764,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; @@ -684,6 +781,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[0].swizzle_y = 7; output[0].swizzle_z = 7; output[0].swizzle_w = 7; + output[0].burst_count = 1; output[0].barrier = 1; output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[0].array_base = 0; @@ -726,33 +824,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_src(struct r600_shader_ctx *ctx, - const struct tgsi_full_src_register *tgsi_src, - struct r600_bc_alu_src *r600_src) +static void r600_bc_src(struct r600_bc_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan) { - int index; - memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); - r600_src->sel = tgsi_src->Register.Index; - if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { - r600_src->sel = 0; - index = tgsi_src->Register.Index; - ctx->value[0] = ctx->literals[index * 4 + 0]; - ctx->value[1] = ctx->literals[index * 4 + 1]; - ctx->value[2] = ctx->literals[index * 4 + 2]; - ctx->value[3] = ctx->literals[index * 4 + 3]; - } - if (tgsi_src->Register.Indirect) - r600_src->rel = V_SQ_REL_RELATIVE; - r600_src->neg = tgsi_src->Register.Negate; - r600_src->abs = tgsi_src->Register.Absolute; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - return 0; + bc_src->sel = shader_src->sel; + bc_src->chan = shader_src->swizzle[chan]; + bc_src->neg = shader_src->neg; + bc_src->abs = shader_src->abs; + bc_src->rel = shader_src->rel; + bc_src->value = shader_src->value[bc_src->chan]; } -static int tgsi_dst(struct r600_shader_ctx *ctx, - const struct tgsi_full_dst_register *tgsi_dst, - unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) +static void tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -765,146 +852,42 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } - return 0; } -static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) +static int tgsi_last_instruction(unsigned writemask) { - switch (swizzle) { - case 0: - return tgsi_src->Register.SwizzleX; - case 1: - return tgsi_src->Register.SwizzleY; - case 2: - return tgsi_src->Register.SwizzleZ; - case 3: - return tgsi_src->Register.SwizzleW; - default: - return 0; - } -} + int i, lasti = 0; -static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nconst, r; - - for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - nconst++; - } - r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); - if (r) { - return r; - } - } - for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.src[0].rel = r600_src[i].rel; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r600_src[i].sel = treg; - r600_src[i].rel =0; - j--; - } - } - return 0; -} - -/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nliteral, r; - - for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { - nliteral++; - } - } - for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); - if (r) - return r; - r600_src[i].sel = treg; - j--; + for (i = 0; i < 4; i++) { + if (writemask & (1 << i)) { + lasti = i; } } - return 0; + return lasti; } static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - int lasti = 0; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - lasti = i; - } - } - - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } } else { - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -942,25 +925,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) * r700 - normalize by dividing by 2PI * see fdo bug 27901 */ -static int tgsi_setup_trig(struct r600_shader_ctx *ctx, - struct r600_bc_alu_src r600_src[3]) +static int tgsi_setup_trig(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + static float half_inv_pi = 1.0 /(3.1415926535 * 2); + static float double_pi = 3.1415926535 * 2; + static float neg_pi = -3.1415926535; + int r; - uint32_t lit_vals[4]; struct r600_bc_alu alu; - memset(lit_vals, 0, 4*4); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - - lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); - lit_vals[1] = fui(0.5f); - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -969,20 +942,17 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[2].chan = 1; + alu.src[1].value = *(uint32_t *)&half_inv_pi; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; + alu.src[2].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); @@ -998,14 +968,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - if (ctx->bc->chiprev == CHIPREV_R600) { - lit_vals[0] = fui(3.1415926535897f * 2.0f); - lit_vals[1] = fui(-3.1415926535897f); - } else { - lit_vals[0] = fui(1.0f); - lit_vals[1] = fui(-0.5f); - } - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1020,26 +982,32 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[2].chan = 1; + alu.src[2].chan = 0; + + if (ctx->bc->chiprev == CHIPREV_R600) { + alu.src[1].value = *(uint32_t *)&double_pi; + alu.src[2].value = *(uint32_t *)&neg_pi; + } else { + alu.src[1].sel = V_SQ_ALU_SRC_1; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; + alu.src[2].neg = 1; + } + alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; return 0; } static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; - int lasti = 0; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; @@ -1057,10 +1025,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) return r; /* replicate result */ - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) - lasti = i; - } for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1069,9 +1033,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1084,7 +1046,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int r; @@ -1092,7 +1053,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) * X or Y components of the destination vector. */ if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; } @@ -1101,9 +1062,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1117,9 +1076,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1135,9 +1092,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = 0; @@ -1147,10 +1102,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* dst.w = 1.0; */ @@ -1159,9 +1110,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1171,10 +1120,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return 0; @@ -1182,7 +1127,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int i, r; @@ -1198,10 +1142,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; @@ -1210,9 +1151,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* kill must be last in ALU */ ctx->bc->force_add_cf = 1; @@ -1224,24 +1162,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1250,12 +1178,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - alu.src[0] = r600_src[0]; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1266,19 +1192,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1287,33 +1207,24 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - chan = alu.dst.chan; sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -1323,17 +1234,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1357,10 +1263,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; } alu.dst.sel = ctx->temp_reg; @@ -1369,9 +1272,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* replicate result */ return tgsi_helper_tempx_replicate(ctx); } @@ -1387,9 +1287,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; @@ -1409,10 +1307,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1420,42 +1315,29 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* replicate result */ return tgsi_helper_tempx_replicate(ctx); } static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); - r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1463,9 +1345,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1476,9 +1355,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; return tgsi_helper_tempx_replicate(ctx); } @@ -1486,16 +1362,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int i, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1505,31 +1373,23 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; + r600_bc_src(&alu.src[2], &ctx->src[0], i); - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -1555,9 +1415,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru struct r600_bc_alu alu; int i, r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -1565,9 +1422,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru alu.dst.chan = i; } else { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; } @@ -1584,61 +1439,50 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* do it in 2 step as op3 doesn't support writemask */ - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - alu.dst.sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - alu.dst.sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; - alu.dst.write = 1; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_DP2: @@ -1670,19 +1514,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_tex(struct r600_shader_ctx *ctx) { + static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_tex tex; struct r600_bc_alu alu; unsigned src_gpr; int r, i; int opcode; - boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; - uint32_t lit_vals[4]; + boolean src_not_temp = + inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[0].Register.File != TGSI_FILE_INPUT; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1690,11 +1536,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -1708,10 +1551,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; @@ -1735,43 +1575,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { - int src_chan, src2_chan; + static const unsigned src0_swizzle[] = {2, 2, 0, 1}; + static const unsigned src1_swizzle[] = {1, 0, 2, 2}; /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); - switch (i) { - case 0: - src_chan = 2; - src2_chan = 1; - break; - case 1: - src_chan = 2; - src2_chan = 0; - break; - case 2: - src_chan = 0; - src2_chan = 2; - break; - case 3: - src_chan = 1; - src2_chan = 2; - break; - default: - assert(0); - src_chan = 0; - src2_chan = 0; - break; - } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); + r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1811,6 +1623,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1831,6 +1644,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1841,11 +1655,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; - lit_vals[0] = fui(1.5f); - - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1854,8 +1663,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = src_gpr; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1876,7 +1684,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -1902,6 +1710,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_w = 1; } + if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { + tex.coord_type_z = 0; + tex.src_sel_z = 1; + } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) + tex.coord_type_z = 0; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = 2; @@ -1916,29 +1730,48 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; + /* optimize if it's just an equal balance */ + if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + alu.omod = 3; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.chan = i; + if (i == lasti) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; + } + /* 1 - src0 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -1946,21 +1779,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -1968,88 +1800,66 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* src0 * src1 + (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - alu.src[1] = r600_src[1]; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - alu.dst.sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; - int use_temp = 0; int i, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - - if (inst->Dst[0].Register.WriteMask != 0xf) - use_temp = 1; + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); - - alu.src[2] = r600_src[1]; - alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - - if (use_temp) - alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bc_src(&alu.src[2], &ctx->src[1], i); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) + if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - if (use_temp) - return tgsi_helper_copy(ctx, inst); return 0; } static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + static const unsigned int src0_swizzle[] = {2, 0, 1}; + static const unsigned int src1_swizzle[] = {1, 2, 0}; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2057,45 +1867,15 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - - alu.src[0] = r600_src[0]; - switch (i) { - case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); - break; - case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - break; - case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - break; - case 3: + if (i < 3) { + r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); + } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; - } - - alu.src[1] = r600_src[1]; - switch (i) { - case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); - break; - case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); - break; - case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); - break; - case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = i; } @@ -2109,44 +1889,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); - alu.src[0] = r600_src[0]; - switch (i) { - case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - break; - case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); - break; - case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - break; - case 3: + if (i < 3) { + r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); + } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; - } - - alu.src[1] = r600_src[1]; - switch (i) { - case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); - break; - case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); - break; - case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); - break; - case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = i; } @@ -2157,11 +1911,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (use_temp) alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + else + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2170,10 +1921,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } if (use_temp) return tgsi_helper_copy(ctx, inst); @@ -2183,7 +1930,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3] = { { 0 } }; struct r600_bc_alu alu; int r; @@ -2192,11 +1938,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2206,10 +1948,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2221,10 +1959,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = tmp - floor(tmp); */ @@ -2232,11 +1966,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - alu.src[0] = r600_src[0]; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2250,19 +1980,13 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2273,9 +1997,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0;*/ @@ -2293,9 +2014,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); } @@ -2311,11 +2029,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2325,10 +2039,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2341,10 +2051,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = src.x / (2 ^ floor(log2(src.x))); */ @@ -2352,11 +2058,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2367,10 +2069,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); @@ -2386,10 +2084,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2405,10 +2099,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2424,19 +2114,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2449,10 +2131,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = log2(src);*/ @@ -2460,11 +2138,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2474,10 +2148,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0; */ @@ -2496,10 +2166,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); @@ -2510,6 +2176,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2524,26 +2191,26 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -2554,29 +2221,51 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; ctx->bc->cf_last->r6xx_uses_waterfall = 1; @@ -2593,26 +2282,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); } - if (i == 0 || i == 2) { + if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; @@ -2625,7 +2306,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; @@ -2637,10 +2317,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; @@ -2654,9 +2331,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); - ctx->bc->cf_last->pop_count = pops; - ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + ctx->bc->cf_last->pop_count = pops; + ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; + } return 0; } @@ -3002,7 +2695,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3075,7 +2768,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, @@ -3160,7 +2853,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 35b0331525..8f96ce5085 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -45,8 +45,7 @@ struct r600_shader { struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; + boolean fs_write_all; }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); - #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 0573e63dc8..56ed35e8b3 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -74,6 +74,10 @@ #define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30) #define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3) #define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF +#define V_SQ_CF_KCACHE_NOP 0x00000000 +#define V_SQ_CF_KCACHE_LOCK_1 0x00000001 +#define V_SQ_CF_KCACHE_LOCK_2 0x00000002 +#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003 #define P_SQ_CF_ALU_WORD1 #define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0) #define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3) @@ -187,6 +191,8 @@ #define V_SQ_ALU_SRC_M_1_INT 0x000000FB #define V_SQ_ALU_SRC_0_5 0x000000FC #define V_SQ_ALU_SRC_LITERAL 0x000000FD +#define V_SQ_ALU_SRC_PV 0x000000FE +#define V_SQ_ALU_SRC_PS 0x000000FF #define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0 #define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) #define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 553d786d65..d3adf0393c 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -36,8 +36,8 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> #include <util/u_framebuffer.h> +#include "util/u_transfer.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -95,230 +95,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) } } -/* FIXME optimize away spi update when it's not needed */ -static void r600_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - -void r600_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i] + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } - - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); - } -} - -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_resource *rbuffer; - unsigned prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - r600_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw->index_buffer_offset; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; - boolean translate = FALSE; - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - r600_draw_common(&draw); - - if (translate) - r600_end_vertex_translate(rctx); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { @@ -341,9 +117,10 @@ static void r600_set_blend_color(struct pipe_context *ctx, static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); struct r600_pipe_state *rstate; - u32 color_control, target_mask; + u32 color_control = 0, target_mask; if (blend == NULL) { return NULL; @@ -353,7 +130,10 @@ static void *r600_create_blend_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_BLEND; target_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); + + /* R600 does not support per-MRT blends */ + if (rctx->family > CHIP_R600) + color_control |= S_028808_PER_MRT_BLEND(1); if (state->logicop_enable) { color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); } else { @@ -376,8 +156,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx, } } blend->cb_target_mask = target_mask; + /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); for (int i = 0; i < 8; i++) { unsigned eqRGB = state->rt[i].rgb_func; @@ -403,10 +184,11 @@ static void *r600_create_blend_state(struct pipe_context *ctx, bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); } - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); - if (i == 0) { + /* R600 does not support per-MRT blends */ + if (rctx->family > CHIP_R600) + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + if (i == 0) r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); - } } return rstate; } @@ -424,10 +206,6 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_DSA; /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); stencil_ref_mask = 0; stencil_ref_mask_bf = 0; @@ -486,7 +264,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, + * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by + * r600_pipe_shader_ps().*/ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); @@ -582,11 +363,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -603,7 +389,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | + S_03C008_TYPE(1), 0xFFFFFFFF, NULL); if (uc.ui) { r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); @@ -626,6 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; + unsigned height, depth; if (resource == NULL) return NULL; @@ -643,7 +432,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { @@ -653,22 +442,30 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { + r600_texture_depth_flush(ctx, texture, TRUE); + tmp = tmp->flushed_depth_texture; + } + + if (tmp->force_int_type) { + word4 &= C_038010_NUM_FORMAT_ALL; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + } rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch_in_pixels[0], 8); - if (tmp->tiled) { - array_mode = tmp->array_mode[0]; - tile_type = tmp->tile_type; + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; + + height = texture->height0; + depth = texture->depth0; + if (texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = 1; + depth = texture->array_size; + } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { + depth = texture->array_size; } /* FIXME properly handle first level != 0 */ @@ -679,22 +476,22 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038000_PITCH((pitch / 8) - 1) | S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_TEX_HEIGHT(height - 1) | + S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + word4 | + S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, S_038014_LAST_LEVEL(state->u.tex.last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + S_038014_BASE_ARRAY(state->u.tex.first_layer) | + S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); @@ -709,7 +506,8 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -724,9 +522,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -736,7 +536,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } @@ -918,33 +719,55 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; /* XXX quite sure for dx10+ hw don't need any offset hacks */ - offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + offset = r600_texture_get_offset(rtex, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_0280A0_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_0280A0_SOURCE_FORMAT(1); + + /* on R600 this can't be set if BLEND_CLAMP isn't set, + if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12) + color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, @@ -988,17 +811,14 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; - rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, @@ -1029,8 +849,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); - rctx->pframebuffer = &rctx->framebuffer; - /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { r600_cb(rctx, rstate, state, i); @@ -1116,48 +934,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } -} - void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; @@ -1197,6 +973,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void r600_init_config(struct r600_pipe_context *rctx) @@ -1450,6 +1227,163 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_context_pipe_state_set(&rctx->ctx, rstate); } +void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; + int pos_index = -1, face_index = -1; + + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + } + + db_shader_control = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1); + } + if (rshader->uses_kill) + db_shader_control |= S_02880C_KILL_ENABLE(1); + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028850_SQ_PGM_RESOURCES_PS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028854_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288CC_SQ_PGM_CF_OFFSET_PS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all), + S_028808_MULTIWRITE_ENABLE(1), + NULL); + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, + db_shader_control, + S_02880C_Z_EXPORT_ENABLE(1) | + S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | + S_02880C_KILL_ENABLE(1), + NULL); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + /* FIXME better to move this in config things so they get emited + * only one time per cs + */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_028614_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028868_SQ_PGM_RESOURCES_VS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288D0_SQ_PGM_CF_OFFSET_VS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028858_SQ_PGM_START_VS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void r600_fetch_shader(struct r600_vertex_element *ve) +{ + struct r600_pipe_state *rstate; + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; @@ -1487,3 +1421,25 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), NULL); return rstate; } + +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + S_038008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + 0xC0000000, 0xFFFFFFFF, NULL); +} diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c647e77b37..43dad0c802 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -27,7 +27,9 @@ #include <util/u_memory.h> #include <util/u_format.h> #include <pipebuffer/pb_buffer.h> +#include "pipe/p_shader_tokens.h" #include "r600_pipe.h" +#include "r600d.h" /* common state between evergreen and r600 */ void r600_bind_blend_state(struct pipe_context *ctx, void *state) @@ -121,17 +123,11 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { + u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state, + v->vmgr_elements); + rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(&rctx->ctx, &v->rstate); - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); - } - } - - if (v) { -// rctx->vs_rebuild = TRUE; } } @@ -147,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) rctx->vertex_elements = NULL; r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } @@ -171,42 +168,28 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_vertex_buffer *vbo; - unsigned max_index = (unsigned)-1; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - - for (int i = 0; i < count; i++) { - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + int i; - if (vbo->max_index == ~0) { - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + /* Zero states. */ + for (i = 0; i < count; i++) { + if (!buffers[i].buffer) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } } - max_index = MIN2(vbo->max_index, max_index); } - rctx->nvertex_buffer = count; - rctx->vb_max_index = max_index; - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); + for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } } -} - -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers); +} void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, @@ -214,33 +197,15 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - enum pipe_format *format; - int i; assert(count < 32); if (!v) return NULL; v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - - for (i = 0; i < count; i++) { - v->hw_format[i] = v->elements[i].src_format; - format = &v->hw_format[i]; - - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - default:; - } - v->incompatible_layout = - v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i]; - - v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); - } + v->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count, + elements, v->elements); if (r600_vertex_elements_build_fetch_shader(rctx, v)) { FREE(v); @@ -310,3 +275,274 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) r600_pipe_shader_destroy(ctx, shader); free(shader); } + +/* FIXME optimize away spi update when it's not needed */ +void r600_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } + + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource_buffer *rbuffer = r600_buffer(buffer); + struct r600_pipe_state *rstate; + uint32_t offset; + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (buffer == NULL) { + return; + } + + r600_upload_const_buffer(rctx, &rbuffer, &offset); + offset += r600_bo_offset(rbuffer->r.bo); + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + + rstate = &rctx->vs_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + + rstate = &rctx->ps_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } + + if (buffer != &rbuffer->r.b.b.b) + pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); +} + +static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + struct pipe_vertex_buffer *vertex_buffer; + unsigned i, count, offset; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + count = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + count = rctx->vbuf_mgr->nr_real_vertex_buffers; + } + + for (i = 0 ; i < count; i++) { + rstate = &rctx->fs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index]; + offset = rctx->vertex_elements->vbuffer_offset[i]; + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i]; + offset = 0; + } + if (vertex_buffer == NULL || rbuffer == NULL) + continue; + offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } + } +} + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw = {}; + unsigned prim; + + r600_flush_depth_textures(rctx); + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); + r600_vertex_buffer_update(rctx); + + draw.info = *info; + draw.ctx = ctx; + if (info->indexed && rctx->index_buffer.buffer) { + draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + + r600_translate_index_buffer(rctx, &draw.index_buffer, + &rctx->index_buffer.index_size, + &draw.info.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + draw.index_buffer_offset = draw.info.start * draw.index_size; + draw.info.start = 0; + + if (u_vbuf_resource(draw.index_buffer)->user_ptr) { + r600_upload_index_buffer(rctx, &draw); + } + } else { + draw.info.index_bias = info->start; + } + + switch (draw.index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw.index_size); + return; + } + if (r600_conv_pipe_prim(draw.info.mode, &prim)) + return; + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); + return; + } + + r600_spi_update(rctx); + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw.info.count; + rdraw.vgt_num_instances = draw.info.instance_count; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw.index_buffer) { + rbuffer = (struct r600_resource*)draw.index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw.index_buffer_offset; + } + + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_draw(&rctx->ctx, &rdraw); + } else { + r600_context_draw(&rctx->ctx, &rdraw); + } + + if (rctx->framebuffer.zsbuf) + { + struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture; + ((struct r600_resource_texture *)tex)->dirty_db = TRUE; + } + + pipe_resource_reference(&draw.index_buffer, NULL); + + u_vbuf_mgr_draw_end(rctx->vbuf_mgr); +} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index d994196e19..3d0360485a 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_038000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_038000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_038000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: @@ -285,10 +289,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_SWAP_ALT; + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_SWAP_STD_REV; @@ -305,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_SWAP_STD; @@ -327,6 +336,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: return V_0280A0_SWAP_STD; @@ -345,9 +355,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_SWAP_STD_REV; + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_0280A0_SWAP_STD; @@ -355,14 +367,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return FMT_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -373,10 +384,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_COLOR_4_4; + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; @@ -397,6 +412,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_COLOR_8_8; @@ -423,7 +439,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_COLOR_10_10_10_2; + return V_0280A0_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: @@ -469,6 +485,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_COLOR_32_32_32_32; /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -479,9 +498,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) @@ -495,21 +514,13 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format, + enum radeon_family family) { - return r600_translate_colorformat(format) != ~0; -} - -static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) -{ - uint32_t result = 0; - const struct util_format_description *desc; unsigned i; - - desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - goto out_unknown; - } + const struct util_format_description *desc = util_format_description(format); + if (!desc) + return FALSE; /* Find the first non-VOID channel. */ for (i = 0; i < 4; i++) { @@ -517,122 +528,23 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) break; } } - - switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ - case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[i].size) { - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16_FLOAT; - break; - case 2: - result = FMT_16_16_FLOAT; - break; - case 3: - result = FMT_16_16_16_FLOAT; - break; - case 4: - result = FMT_16_16_16_16_FLOAT; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32_FLOAT; - break; - case 2: - result = FMT_32_32_FLOAT; - break; - case 3: - result = FMT_32_32_32_FLOAT; - break; - case 4: - result = FMT_32_32_32_32_FLOAT; - break; - } - break; - default: - goto out_unknown; - } - break; - /* Unsigned ints */ - case UTIL_FORMAT_TYPE_UNSIGNED: - /* Signed ints */ - case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[i].size) { - case 8: - switch (desc->nr_channels) { - case 1: - result = FMT_8; - break; - case 2: - result = FMT_8_8; - break; - case 3: - // result = FMT_8_8_8; /* fails piglit draw-vertices test */ - // break; - case 4: - result = FMT_8_8_8_8; - break; - } - break; - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16; - break; - case 2: - result = FMT_16_16; - break; - case 3: - // result = FMT_16_16_16; /* fails piglit draw-vertices test */ - // break; - case 4: - result = FMT_16_16_16_16; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32; - break; - case 2: - result = FMT_32_32; - break; - case 3: - result = FMT_32_32_32; - break; - case 4: - result = FMT_32_32_32_32; - break; - } - break; - default: - goto out_unknown; - } - break; - default: - goto out_unknown; - } - - result = S_038008_DATA_FORMAT(result); - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= S_038008_FORMAT_COMP_ALL(1); - } - if (desc->channel[i].normalized) { - result |= S_038008_NUM_FORMAT_ALL(0); - } else { - result |= S_038008_NUM_FORMAT_ALL(2); - } - return result; -out_unknown: - R600_ERR("unsupported vertex format %s\n", util_format_name(format)); - return ~0; + if (i == 4) + return FALSE; + + /* No fixed, no double. */ + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || + (desc->channel[i].size == 64 && + desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) + return FALSE; + + /* No scaled/norm formats with 32 bits per channel. */ + if (desc->channel[i].size == 32 && + (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) + return FALSE; + + return TRUE; } #endif diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h deleted file mode 100644 index 1c8075ebdb..0000000000 --- a/src/gallium/drivers/r600/r600_states_inc.h +++ /dev/null @@ -1,543 +0,0 @@ -/* This file is autogenerated from r600_states.h - do not edit directly */ -/* autogenerating script is gen_r600_states.py */ - -/* R600_CONFIG */ -#define R600_CONFIG__SQ_CONFIG 0 -#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1 -#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2 2 -#define R600_CONFIG__SQ_THREAD_RESOURCE_MGMT 3 -#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1 4 -#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2 5 -#define R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 6 -#define R600_CONFIG__TA_CNTL_AUX 7 -#define R600_CONFIG__VC_ENHANCE 8 -#define R600_CONFIG__DB_DEBUG 9 -#define R600_CONFIG__DB_WATERMARKS 10 -#define R600_CONFIG__SX_MISC 11 -#define R600_CONFIG__SPI_THREAD_GROUPING 12 -#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13 -#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14 -#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15 -#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16 -#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17 -#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18 -#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19 -#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20 -#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21 -#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22 -#define R600_CONFIG__VGT_HOS_CNTL 23 -#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24 -#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25 -#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26 -#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27 -#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28 -#define R600_CONFIG__VGT_GROUP_DECR 29 -#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30 -#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31 -#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32 -#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33 -#define R600_CONFIG__VGT_GS_MODE 34 -#define R600_CONFIG__PA_SC_MODE_CNTL 35 -#define R600_CONFIG__VGT_STRMOUT_EN 36 -#define R600_CONFIG__VGT_REUSE_OFF 37 -#define R600_CONFIG__VGT_VTX_CNT_EN 38 -#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39 -#define R600_CONFIG_SIZE 40 -#define R600_CONFIG_PM4 128 - -/* R600_CB_CNTL */ -#define R600_CB_CNTL__CB_CLEAR_RED 0 -#define R600_CB_CNTL__CB_CLEAR_GREEN 1 -#define R600_CB_CNTL__CB_CLEAR_BLUE 2 -#define R600_CB_CNTL__CB_CLEAR_ALPHA 3 -#define R600_CB_CNTL__CB_SHADER_MASK 4 -#define R600_CB_CNTL__CB_TARGET_MASK 5 -#define R600_CB_CNTL__CB_FOG_RED 6 -#define R600_CB_CNTL__CB_FOG_GREEN 7 -#define R600_CB_CNTL__CB_FOG_BLUE 8 -#define R600_CB_CNTL__CB_COLOR_CONTROL 9 -#define R600_CB_CNTL__PA_SC_AA_CONFIG 10 -#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 11 -#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 12 -#define R600_CB_CNTL__CB_CLRCMP_CONTROL 13 -#define R600_CB_CNTL__CB_CLRCMP_SRC 14 -#define R600_CB_CNTL__CB_CLRCMP_DST 15 -#define R600_CB_CNTL__CB_CLRCMP_MSK 16 -#define R600_CB_CNTL__PA_SC_AA_MASK 17 -#define R600_CB_CNTL__CB_SHADER_CONTROL 18 -#define R600_CB_CNTL_SIZE 19 -#define R600_CB_CNTL_PM4 128 - -/* R600_RASTERIZER */ -#define R600_RASTERIZER__SPI_INTERP_CONTROL_0 0 -#define R600_RASTERIZER__PA_CL_CLIP_CNTL 1 -#define R600_RASTERIZER__PA_SU_SC_MODE_CNTL 2 -#define R600_RASTERIZER__PA_CL_VS_OUT_CNTL 3 -#define R600_RASTERIZER__PA_CL_NANINF_CNTL 4 -#define R600_RASTERIZER__PA_SU_POINT_SIZE 5 -#define R600_RASTERIZER__PA_SU_POINT_MINMAX 6 -#define R600_RASTERIZER__PA_SU_LINE_CNTL 7 -#define R600_RASTERIZER__PA_SC_LINE_STIPPLE 8 -#define R600_RASTERIZER__PA_SC_MPASS_PS_CNTL 9 -#define R600_RASTERIZER__PA_SC_LINE_CNTL 10 -#define R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11 -#define R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12 -#define R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13 -#define R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20 -#define R600_RASTERIZER_SIZE 21 -#define R600_RASTERIZER_PM4 128 - -/* R600_VIEWPORT */ -#define R600_VIEWPORT__PA_SC_VPORT_ZMIN_0 0 -#define R600_VIEWPORT__PA_SC_VPORT_ZMAX_0 1 -#define R600_VIEWPORT__PA_CL_VPORT_XSCALE_0 2 -#define R600_VIEWPORT__PA_CL_VPORT_YSCALE_0 3 -#define R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4 -#define R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5 -#define R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6 -#define R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7 -#define R600_VIEWPORT__PA_CL_VTE_CNTL 8 -#define R600_VIEWPORT_SIZE 9 -#define R600_VIEWPORT_PM4 128 - -/* R600_SCISSOR */ -#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0 -#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1 -#define R600_SCISSOR__PA_SC_WINDOW_OFFSET 2 -#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3 -#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4 -#define R600_SCISSOR__PA_SC_CLIPRECT_RULE 5 -#define R600_SCISSOR__PA_SC_CLIPRECT_0_TL 6 -#define R600_SCISSOR__PA_SC_CLIPRECT_0_BR 7 -#define R600_SCISSOR__PA_SC_CLIPRECT_1_TL 8 -#define R600_SCISSOR__PA_SC_CLIPRECT_1_BR 9 -#define R600_SCISSOR__PA_SC_CLIPRECT_2_TL 10 -#define R600_SCISSOR__PA_SC_CLIPRECT_2_BR 11 -#define R600_SCISSOR__PA_SC_CLIPRECT_3_TL 12 -#define R600_SCISSOR__PA_SC_CLIPRECT_3_BR 13 -#define R600_SCISSOR__PA_SC_EDGERULE 14 -#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15 -#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16 -#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17 -#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18 -#define R600_SCISSOR_SIZE 19 -#define R600_SCISSOR_PM4 128 - -/* R600_BLEND */ -#define R600_BLEND__CB_BLEND_RED 0 -#define R600_BLEND__CB_BLEND_GREEN 1 -#define R600_BLEND__CB_BLEND_BLUE 2 -#define R600_BLEND__CB_BLEND_ALPHA 3 -#define R600_BLEND__CB_BLEND0_CONTROL 4 -#define R600_BLEND__CB_BLEND1_CONTROL 5 -#define R600_BLEND__CB_BLEND2_CONTROL 6 -#define R600_BLEND__CB_BLEND3_CONTROL 7 -#define R600_BLEND__CB_BLEND4_CONTROL 8 -#define R600_BLEND__CB_BLEND5_CONTROL 9 -#define R600_BLEND__CB_BLEND6_CONTROL 10 -#define R600_BLEND__CB_BLEND7_CONTROL 11 -#define R600_BLEND__CB_BLEND_CONTROL 12 -#define R600_BLEND_SIZE 13 -#define R600_BLEND_PM4 128 - -/* R600_DSA */ -#define R600_DSA__DB_STENCIL_CLEAR 0 -#define R600_DSA__DB_DEPTH_CLEAR 1 -#define R600_DSA__SX_ALPHA_TEST_CONTROL 2 -#define R600_DSA__DB_STENCILREFMASK 3 -#define R600_DSA__DB_STENCILREFMASK_BF 4 -#define R600_DSA__SX_ALPHA_REF 5 -#define R600_DSA__SPI_FOG_FUNC_SCALE 6 -#define R600_DSA__SPI_FOG_FUNC_BIAS 7 -#define R600_DSA__SPI_FOG_CNTL 8 -#define R600_DSA__DB_DEPTH_CONTROL 9 -#define R600_DSA__DB_SHADER_CONTROL 10 -#define R600_DSA__DB_RENDER_CONTROL 11 -#define R600_DSA__DB_RENDER_OVERRIDE 12 -#define R600_DSA__DB_SRESULTS_COMPARE_STATE1 13 -#define R600_DSA__DB_PRELOAD_CONTROL 14 -#define R600_DSA__DB_ALPHA_TO_MASK 15 -#define R600_DSA_SIZE 16 -#define R600_DSA_PM4 128 - -/* R600_VS_SHADER */ -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_0 0 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_1 1 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_2 2 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_3 3 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_4 4 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_5 5 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_6 6 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_7 7 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_8 8 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_9 9 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_10 10 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_11 11 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_12 12 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_13 13 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_14 14 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_15 15 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_16 16 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_17 17 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_18 18 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_19 19 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_20 20 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_21 21 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_22 22 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_23 23 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_24 24 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_25 25 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_26 26 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_27 27 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_28 28 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_29 29 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_30 30 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_31 31 -#define R600_VS_SHADER__SPI_VS_OUT_ID_0 32 -#define R600_VS_SHADER__SPI_VS_OUT_ID_1 33 -#define R600_VS_SHADER__SPI_VS_OUT_ID_2 34 -#define R600_VS_SHADER__SPI_VS_OUT_ID_3 35 -#define R600_VS_SHADER__SPI_VS_OUT_ID_4 36 -#define R600_VS_SHADER__SPI_VS_OUT_ID_5 37 -#define R600_VS_SHADER__SPI_VS_OUT_ID_6 38 -#define R600_VS_SHADER__SPI_VS_OUT_ID_7 39 -#define R600_VS_SHADER__SPI_VS_OUT_ID_8 40 -#define R600_VS_SHADER__SPI_VS_OUT_ID_9 41 -#define R600_VS_SHADER__SPI_VS_OUT_CONFIG 42 -#define R600_VS_SHADER__SQ_PGM_START_VS 43 -#define R600_VS_SHADER__SQ_PGM_RESOURCES_VS 44 -#define R600_VS_SHADER__SQ_PGM_START_FS 45 -#define R600_VS_SHADER__SQ_PGM_RESOURCES_FS 46 -#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_VS 47 -#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_FS 48 -#define R600_VS_SHADER_SIZE 49 -#define R600_VS_SHADER_PM4 128 - -/* R600_PS_SHADER */ -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 0 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_1 1 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_2 2 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_3 3 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_4 4 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_5 5 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_6 6 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_7 7 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_8 8 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_9 9 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_10 10 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_11 11 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_12 12 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_13 13 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_14 14 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_15 15 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_16 16 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_17 17 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_18 18 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_19 19 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_20 20 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_21 21 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_22 22 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_23 23 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_24 24 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_25 25 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_26 26 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_27 27 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_28 28 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_29 29 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_30 30 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_31 31 -#define R600_PS_SHADER__SPI_PS_IN_CONTROL_0 32 -#define R600_PS_SHADER__SPI_PS_IN_CONTROL_1 33 -#define R600_PS_SHADER__SPI_INPUT_Z 34 -#define R600_PS_SHADER__SQ_PGM_START_PS 35 -#define R600_PS_SHADER__SQ_PGM_RESOURCES_PS 36 -#define R600_PS_SHADER__SQ_PGM_EXPORTS_PS 37 -#define R600_PS_SHADER__SQ_PGM_CF_OFFSET_PS 38 -#define R600_PS_SHADER_SIZE 39 -#define R600_PS_SHADER_PM4 128 - -/* R600_VS_CBUF */ -#define R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0 -#define R600_VS_CBUF__ALU_CONST_CACHE_VS_0 1 -#define R600_VS_CBUF_SIZE 2 -#define R600_VS_CBUF_PM4 128 - -/* R600_PS_CBUF */ -#define R600_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0 -#define R600_PS_CBUF__ALU_CONST_CACHE_PS_0 1 -#define R600_PS_CBUF_SIZE 2 -#define R600_PS_CBUF_PM4 128 - -/* R600_PS_CONSTANT */ -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0 0 -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0 1 -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0 2 -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0 3 -#define R600_PS_CONSTANT_SIZE 4 -#define R600_PS_CONSTANT_PM4 128 - -/* R600_VS_CONSTANT */ -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256 0 -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256 1 -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256 2 -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256 3 -#define R600_VS_CONSTANT_SIZE 4 -#define R600_VS_CONSTANT_PM4 128 - -/* R600_UCP */ -#define R600_UCP__PA_CL_UCP0_X 0 -#define R600_UCP__PA_CL_UCP0_Y 1 -#define R600_UCP__PA_CL_UCP0_Z 2 -#define R600_UCP__PA_CL_UCP0_W 3 -#define R600_UCP__PA_CL_UCP1_X 4 -#define R600_UCP__PA_CL_UCP1_Y 5 -#define R600_UCP__PA_CL_UCP1_Z 6 -#define R600_UCP__PA_CL_UCP1_W 7 -#define R600_UCP__PA_CL_UCP2_X 8 -#define R600_UCP__PA_CL_UCP2_Y 9 -#define R600_UCP__PA_CL_UCP2_Z 10 -#define R600_UCP__PA_CL_UCP2_W 11 -#define R600_UCP__PA_CL_UCP3_X 12 -#define R600_UCP__PA_CL_UCP3_Y 13 -#define R600_UCP__PA_CL_UCP3_Z 14 -#define R600_UCP__PA_CL_UCP3_W 15 -#define R600_UCP__PA_CL_UCP4_X 16 -#define R600_UCP__PA_CL_UCP4_Y 17 -#define R600_UCP__PA_CL_UCP4_Z 18 -#define R600_UCP__PA_CL_UCP4_W 19 -#define R600_UCP__PA_CL_UCP5_X 20 -#define R600_UCP__PA_CL_UCP5_Y 21 -#define R600_UCP__PA_CL_UCP5_Z 22 -#define R600_UCP__PA_CL_UCP5_W 23 -#define R600_UCP_SIZE 24 -#define R600_UCP_PM4 128 - -/* R600_PS_RESOURCE */ -#define R600_PS_RESOURCE__RESOURCE0_WORD0 0 -#define R600_PS_RESOURCE__RESOURCE0_WORD1 1 -#define R600_PS_RESOURCE__RESOURCE0_WORD2 2 -#define R600_PS_RESOURCE__RESOURCE0_WORD3 3 -#define R600_PS_RESOURCE__RESOURCE0_WORD4 4 -#define R600_PS_RESOURCE__RESOURCE0_WORD5 5 -#define R600_PS_RESOURCE__RESOURCE0_WORD6 6 -#define R600_PS_RESOURCE_SIZE 7 -#define R600_PS_RESOURCE_PM4 128 - -/* R600_VS_RESOURCE */ -#define R600_VS_RESOURCE__RESOURCE160_WORD0 0 -#define R600_VS_RESOURCE__RESOURCE160_WORD1 1 -#define R600_VS_RESOURCE__RESOURCE160_WORD2 2 -#define R600_VS_RESOURCE__RESOURCE160_WORD3 3 -#define R600_VS_RESOURCE__RESOURCE160_WORD4 4 -#define R600_VS_RESOURCE__RESOURCE160_WORD5 5 -#define R600_VS_RESOURCE__RESOURCE160_WORD6 6 -#define R600_VS_RESOURCE_SIZE 7 -#define R600_VS_RESOURCE_PM4 128 - -/* R600_FS_RESOURCE */ -#define R600_FS_RESOURCE__RESOURCE320_WORD0 0 -#define R600_FS_RESOURCE__RESOURCE320_WORD1 1 -#define R600_FS_RESOURCE__RESOURCE320_WORD2 2 -#define R600_FS_RESOURCE__RESOURCE320_WORD3 3 -#define R600_FS_RESOURCE__RESOURCE320_WORD4 4 -#define R600_FS_RESOURCE__RESOURCE320_WORD5 5 -#define R600_FS_RESOURCE__RESOURCE320_WORD6 6 -#define R600_FS_RESOURCE_SIZE 7 -#define R600_FS_RESOURCE_PM4 128 - -/* R600_GS_RESOURCE */ -#define R600_GS_RESOURCE__RESOURCE336_WORD0 0 -#define R600_GS_RESOURCE__RESOURCE336_WORD1 1 -#define R600_GS_RESOURCE__RESOURCE336_WORD2 2 -#define R600_GS_RESOURCE__RESOURCE336_WORD3 3 -#define R600_GS_RESOURCE__RESOURCE336_WORD4 4 -#define R600_GS_RESOURCE__RESOURCE336_WORD5 5 -#define R600_GS_RESOURCE__RESOURCE336_WORD6 6 -#define R600_GS_RESOURCE_SIZE 7 -#define R600_GS_RESOURCE_PM4 128 - -/* R600_PS_SAMPLER */ -#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0 -#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1 -#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2 -#define R600_PS_SAMPLER_SIZE 3 -#define R600_PS_SAMPLER_PM4 128 - -/* R600_VS_SAMPLER */ -#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0 -#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1 -#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2 -#define R600_VS_SAMPLER_SIZE 3 -#define R600_VS_SAMPLER_PM4 128 - -/* R600_GS_SAMPLER */ -#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0 -#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1 -#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2 -#define R600_GS_SAMPLER_SIZE 3 -#define R600_GS_SAMPLER_PM4 128 - -/* R600_PS_SAMPLER_BORDER */ -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 -#define R600_PS_SAMPLER_BORDER_SIZE 4 -#define R600_PS_SAMPLER_BORDER_PM4 128 - -/* R600_VS_SAMPLER_BORDER */ -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 -#define R600_VS_SAMPLER_BORDER_SIZE 4 -#define R600_VS_SAMPLER_BORDER_PM4 128 - -/* R600_GS_SAMPLER_BORDER */ -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 -#define R600_GS_SAMPLER_BORDER_SIZE 4 -#define R600_GS_SAMPLER_BORDER_PM4 128 - -/* R600_CB0 */ -#define R600_CB0__CB_COLOR0_BASE 0 -#define R600_CB0__CB_COLOR0_INFO 1 -#define R600_CB0__CB_COLOR0_SIZE 2 -#define R600_CB0__CB_COLOR0_VIEW 3 -#define R600_CB0__CB_COLOR0_FRAG 4 -#define R600_CB0__CB_COLOR0_TILE 5 -#define R600_CB0__CB_COLOR0_MASK 6 -#define R600_CB0_SIZE 7 -#define R600_CB0_PM4 128 - -/* R600_CB1 */ -#define R600_CB1__CB_COLOR1_BASE 0 -#define R600_CB1__CB_COLOR1_INFO 1 -#define R600_CB1__CB_COLOR1_SIZE 2 -#define R600_CB1__CB_COLOR1_VIEW 3 -#define R600_CB1__CB_COLOR1_FRAG 4 -#define R600_CB1__CB_COLOR1_TILE 5 -#define R600_CB1__CB_COLOR1_MASK 6 -#define R600_CB1_SIZE 7 -#define R600_CB1_PM4 128 - -/* R600_CB2 */ -#define R600_CB2__CB_COLOR2_BASE 0 -#define R600_CB2__CB_COLOR2_INFO 1 -#define R600_CB2__CB_COLOR2_SIZE 2 -#define R600_CB2__CB_COLOR2_VIEW 3 -#define R600_CB2__CB_COLOR2_FRAG 4 -#define R600_CB2__CB_COLOR2_TILE 5 -#define R600_CB2__CB_COLOR2_MASK 6 -#define R600_CB2_SIZE 7 -#define R600_CB2_PM4 128 - -/* R600_CB3 */ -#define R600_CB3__CB_COLOR3_BASE 0 -#define R600_CB3__CB_COLOR3_INFO 1 -#define R600_CB3__CB_COLOR3_SIZE 2 -#define R600_CB3__CB_COLOR3_VIEW 3 -#define R600_CB3__CB_COLOR3_FRAG 4 -#define R600_CB3__CB_COLOR3_TILE 5 -#define R600_CB3__CB_COLOR3_MASK 6 -#define R600_CB3_SIZE 7 -#define R600_CB3_PM4 128 - -/* R600_CB4 */ -#define R600_CB4__CB_COLOR4_BASE 0 -#define R600_CB4__CB_COLOR4_INFO 1 -#define R600_CB4__CB_COLOR4_SIZE 2 -#define R600_CB4__CB_COLOR4_VIEW 3 -#define R600_CB4__CB_COLOR4_FRAG 4 -#define R600_CB4__CB_COLOR4_TILE 5 -#define R600_CB4__CB_COLOR4_MASK 6 -#define R600_CB4_SIZE 7 -#define R600_CB4_PM4 128 - -/* R600_CB5 */ -#define R600_CB5__CB_COLOR5_BASE 0 -#define R600_CB5__CB_COLOR5_INFO 1 -#define R600_CB5__CB_COLOR5_SIZE 2 -#define R600_CB5__CB_COLOR5_VIEW 3 -#define R600_CB5__CB_COLOR5_FRAG 4 -#define R600_CB5__CB_COLOR5_TILE 5 -#define R600_CB5__CB_COLOR5_MASK 6 -#define R600_CB5_SIZE 7 -#define R600_CB5_PM4 128 - -/* R600_CB6 */ -#define R600_CB6__CB_COLOR6_BASE 0 -#define R600_CB6__CB_COLOR6_INFO 1 -#define R600_CB6__CB_COLOR6_SIZE 2 -#define R600_CB6__CB_COLOR6_VIEW 3 -#define R600_CB6__CB_COLOR6_FRAG 4 -#define R600_CB6__CB_COLOR6_TILE 5 -#define R600_CB6__CB_COLOR6_MASK 6 -#define R600_CB6_SIZE 7 -#define R600_CB6_PM4 128 - -/* R600_CB7 */ -#define R600_CB7__CB_COLOR7_BASE 0 -#define R600_CB7__CB_COLOR7_INFO 1 -#define R600_CB7__CB_COLOR7_SIZE 2 -#define R600_CB7__CB_COLOR7_VIEW 3 -#define R600_CB7__CB_COLOR7_FRAG 4 -#define R600_CB7__CB_COLOR7_TILE 5 -#define R600_CB7__CB_COLOR7_MASK 6 -#define R600_CB7_SIZE 7 -#define R600_CB7_PM4 128 - -/* R600_DB */ -#define R600_DB__DB_DEPTH_BASE 0 -#define R600_DB__DB_DEPTH_SIZE 1 -#define R600_DB__DB_DEPTH_VIEW 2 -#define R600_DB__DB_DEPTH_INFO 3 -#define R600_DB__DB_HTILE_SURFACE 4 -#define R600_DB__DB_PREFETCH_LIMIT 5 -#define R600_DB_SIZE 6 -#define R600_DB_PM4 128 - -/* R600_VGT */ -#define R600_VGT__VGT_PRIMITIVE_TYPE 0 -#define R600_VGT__VGT_MAX_VTX_INDX 1 -#define R600_VGT__VGT_MIN_VTX_INDX 2 -#define R600_VGT__VGT_INDX_OFFSET 3 -#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX 4 -#define R600_VGT__VGT_DMA_INDEX_TYPE 5 -#define R600_VGT__VGT_PRIMITIVEID_EN 6 -#define R600_VGT__VGT_DMA_NUM_INSTANCES 7 -#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN 8 -#define R600_VGT__VGT_INSTANCE_STEP_RATE_0 9 -#define R600_VGT__VGT_INSTANCE_STEP_RATE_1 10 -#define R600_VGT_SIZE 11 -#define R600_VGT_PM4 128 - -/* R600_DRAW */ -#define R600_DRAW__VGT_NUM_INDICES 0 -#define R600_DRAW__VGT_DMA_BASE_HI 1 -#define R600_DRAW__VGT_DMA_BASE 2 -#define R600_DRAW__VGT_DRAW_INITIATOR 3 -#define R600_DRAW_SIZE 4 -#define R600_DRAW_PM4 128 - -/* R600_VGT_EVENT */ -#define R600_VGT_EVENT__VGT_EVENT_INITIATOR 0 -#define R600_VGT_EVENT_SIZE 1 -#define R600_VGT_EVENT_PM4 128 - -/* R600_CB_FLUSH */ -#define R600_CB_FLUSH_SIZE 0 -#define R600_CB_FLUSH_PM4 128 - -/* R600_DB_FLUSH */ -#define R600_DB_FLUSH_SIZE 0 -#define R600_DB_FLUSH_PM4 128 - diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index d4d9b07c0e..dc351bfb62 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -27,6 +27,7 @@ #include <errno.h> #include <pipe/p_screen.h> #include <util/u_format.h> +#include <util/u_format_s3tc.h> #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> @@ -38,8 +39,6 @@ #include "r600d.h" #include "r600_formats.h" -extern struct u_resource_vtbl r600_texture_vtbl; - /* Copy from a full GPU texture to a transfer's staging one. */ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) { @@ -69,7 +68,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 rtransfer->staging_texture, 0, &sbox); - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, @@ -77,17 +76,15 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, { unsigned offset = rtex->offset[level]; - switch (rtex->resource.base.b.target) { + switch (rtex->resource.b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * rtex->layer_size[level]; default: - assert(layer == 0); - return offset; + return offset + layer * rtex->layer_size[level]; } } -static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, +static unsigned r600_get_block_alignment(struct pipe_screen *screen, enum pipe_format format, unsigned array_mode) { @@ -105,6 +102,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * rscreen->tiling_info->num_banks)) * 8; break; + case V_038000_ARRAY_LINEAR_ALIGNED: + p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize); + break; case V_038000_ARRAY_LINEAR_GENERAL: default: p_align = rscreen->tiling_info->group_bytes / pixsize; @@ -124,8 +124,10 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, h_align = rscreen->tiling_info->num_channels * 8; break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: h_align = 8; break; + case V_038000_ARRAY_LINEAR_GENERAL: default: h_align = 1; break; @@ -139,7 +141,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, { struct r600_screen* rscreen = (struct r600_screen *)screen; unsigned pixsize = util_format_get_blocksize(format); - int p_align = r600_get_pixel_alignment(screen, format, array_mode); + int p_align = r600_get_block_alignment(screen, format, array_mode); int h_align = r600_get_height_alignment(screen, array_mode); int b_align; @@ -149,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, p_align * pixsize * h_align); break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: + case V_038000_ARRAY_LINEAR_GENERAL: default: b_align = rscreen->tiling_info->group_bytes; break; @@ -165,55 +169,46 @@ static unsigned mip_minify(unsigned size, unsigned level) return val; } -static unsigned r600_texture_get_stride(struct pipe_screen *screen, - struct r600_resource_texture *rtex, - unsigned level) +static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned width, stride, tile_width; + struct pipe_resource *ptex = &rtex->resource.b.b.b; + unsigned nblocksx, block_align, width; + unsigned blocksize = util_format_get_blocksize(ptex->format); if (rtex->pitch_override) - return rtex->pitch_override; + return rtex->pitch_override / blocksize; width = mip_minify(ptex->width0, level); - if (util_format_is_plain(ptex->format)) { - tile_width = r600_get_pixel_alignment(screen, ptex->format, - rtex->array_mode[level]); - width = align(width, tile_width); - } - stride = util_format_get_stride(ptex->format, width); + nblocksx = util_format_get_nblocksx(ptex->format, width); - return stride; + block_align = r600_get_block_alignment(screen, ptex->format, + rtex->array_mode[level]); + nblocksx = align(nblocksx, block_align); + return nblocksx; } static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned height, tile_height; height = mip_minify(ptex->height0, level); - if (util_format_is_plain(ptex->format)) { - tile_height = r600_get_height_alignment(screen, - rtex->array_mode[level]); - height = align(height, tile_height); - } - return util_format_get_nblocksy(ptex->format, height); -} - -/* Get a width in pixels from a stride in bytes. */ -static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes) -{ - return (pitch_in_bytes / util_format_get_blocksize(format)) * - util_format_get_blockwidth(format); + height = util_format_get_nblocksy(ptex->format, height); + tile_height = r600_get_height_alignment(screen, + rtex->array_mode[level]); + height = align(height, tile_height); + return height; } static void r600_texture_set_array_mode(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; switch (array_mode) { case V_0280A0_ARRAY_LINEAR_GENERAL: @@ -227,11 +222,11 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, unsigned w, h, tile_height, tile_width; tile_height = r600_get_height_alignment(screen, array_mode); - tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode); + tile_width = r600_get_block_alignment(screen, ptex->format, array_mode); w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); - if (w < tile_width || h < tile_height) + if (w <= tile_width || h <= tile_height) rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1; else rtex->array_mode[level] = array_mode; @@ -244,40 +239,119 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); - unsigned pitch, size, layer_size, i, offset; - unsigned nblocksy; + unsigned size, layer_size, i, offset; + unsigned nblocksx, nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { + unsigned blocksize = util_format_get_blocksize(ptex->format); + r600_texture_set_array_mode(screen, rtex, i, array_mode); - pitch = r600_texture_get_stride(screen, rtex, i); + nblocksx = r600_texture_get_nblocksx(screen, rtex, i); nblocksy = r600_texture_get_nblocksy(screen, rtex, i); - layer_size = pitch * nblocksy; - + layer_size = nblocksx * nblocksy * blocksize; if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; else size = layer_size * 6; } - else + else if (ptex->target == PIPE_TEXTURE_3D) size = layer_size * u_minify(ptex->depth0, i); + else + size = layer_size * ptex->array_size; + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch_in_bytes[i] = pitch; - rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); + rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ + rtex->pitch_in_bytes[i] = nblocksx * blocksize; + offset += size; } rtex->size = offset; } +/* Figure out whether u_blitter will fallback to a transfer operation. + * If so, don't use a staging resource. + */ +static boolean permit_hardware_blit(struct pipe_screen *screen, + const struct pipe_resource *res) +{ + unsigned bind; + + if (util_format_is_depth_or_stencil(res->format)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; + + /* hackaround for S3TC */ + if (util_format_is_compressed(res->format)) + return TRUE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + bind)) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + PIPE_BIND_SAMPLER_VIEW)) + return FALSE; + + return TRUE; +} + +static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_resource *ptex, + struct winsys_handle *whandle) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + return r600_bo_get_winsys_handle(radeon, resource->bo, + rtex->pitch_in_bytes[0], whandle); +} + +static void r600_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *ptex) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + if (rtex->flushed_depth_texture) + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (resource->bo) { + r600_bo_reference(radeon, &resource->bo, NULL); + } + FREE(rtex); +} + +static const struct u_resource_vtbl r600_texture_vtbl = +{ + r600_texture_get_handle, /* get_handle */ + r600_texture_destroy, /* resource_destroy */ + r600_texture_get_transfer, /* get_transfer */ + r600_texture_transfer_destroy, /* transfer_destroy */ + r600_texture_transfer_map, /* transfer_map */ + u_default_transfer_flush_region,/* transfer_flush_region */ + r600_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + static struct r600_resource_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -295,21 +369,22 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; resource = &rtex->resource; - resource->base.b = *base; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; + resource->b.b.b = *base; + resource->b.b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->b.b.b.reference, 1); + resource->b.b.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; + /* only mark depth textures the HW can hit as depth textures */ + if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) + rtex->depth = 1; - if (array_mode) - rtex->tiled = 1; r600_setup_miptree(screen, rtex, array_mode); resource->size = rtex->size; if (!resource->bo) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); @@ -329,56 +404,38 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, /* Would like some magic "get_bool_option_once" routine. */ - if (force_tiling == -1) - force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); + if (force_tiling == -1) { +#if 0 + /* reenable when 2D tiling is fixed better */ + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + force_tiling = debug_get_bool_option("R600_TILING", TRUE); +#endif + force_tiling = debug_get_bool_option("R600_TILING", FALSE); + } - if (force_tiling) { + if (force_tiling && permit_hardware_blit(screen, templ)) { if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && !(templ->bind & PIPE_BIND_SCANOUT)) { array_mode = V_038000_ARRAY_2D_TILED_THIN1; } } + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + util_format_is_compressed(templ->format)) + array_mode = V_038000_ARRAY_1D_TILED_THIN1; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, 0, 0, NULL); } -static void r600_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *ptex) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - if (rtex->flushed_depth_texture) - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); - - if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); - } - FREE(rtex); -} - -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - return r600_bo_get_winsys_handle(radeon, resource->bo, - rtex->pitch_in_bytes[0], whandle); -} - static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *surf_tmpl) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - unsigned tile_height; unsigned level = surf_tmpl->u.tex.level; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -398,8 +455,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->base.u.tex.level = level; - tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]); - surface->aligned_height = align(surface->base.height, tile_height); + surface->aligned_height = r600_texture_get_nblocksy(pipe->screen, + rtex, level); return &surface->base; } @@ -435,18 +492,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, bo); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); - int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture) + struct pipe_resource *texture, boolean just_create) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; @@ -459,7 +506,8 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = 1; - resource.last_level = 0; + resource.array_size = 1; + resource.last_level = texture->last_level; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; @@ -473,10 +521,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx, return -ENOMEM; } + ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE; out: + if (just_create) + return 0; + /* XXX: only do this if the depth texture has actually changed: */ - r600_blit_uncompress_depth_ptr(ctx, rtex); + r600_blit_uncompress_depth(ctx, rtex); return 0; } @@ -487,46 +539,6 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box ) return box->width * box->depth * box->height; }; - -/* Figure out whether u_blitter will fallback to a transfer operation. - * If so, don't use a staging resource. - */ -static boolean permit_hardware_blit(struct pipe_screen *screen, - struct pipe_resource *res) -{ - unsigned bind; - - if (util_format_is_depth_or_stencil(res->format)) - bind = PIPE_BIND_DEPTH_STENCIL; - else - bind = PIPE_BIND_RENDER_TARGET; - - /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ - if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - bind, 0)) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) - return FALSE; - - return TRUE; -} - struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, unsigned level, @@ -546,7 +558,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ - if (rtex->tiled) + if (R600_TEX_IS_TILED(rtex, level)) use_staging_texture = TRUE; if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) @@ -579,13 +591,16 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, */ /* XXX: when discard is true, no need to read back from depth texture */ - r = r600_texture_depth_flush(ctx, texture); + r = r600_texture_depth_flush(ctx, texture, FALSE); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } + trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level]; + trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z); + return &trans->transfer; } else if (use_staging_texture) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; @@ -622,11 +637,12 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } return &trans->transfer; } trans->transfer.stride = rtex->pitch_in_bytes[level]; + trans->transfer.layer_stride = rtex->layer_size[level]; trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; } @@ -635,7 +651,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + struct pipe_resource *texture = transfer->resource; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; if (rtransfer->staging_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -643,9 +660,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, } pipe_resource_reference(&rtransfer->staging_texture, NULL); } - if (rtex->flushed_depth_texture) { - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (rtex->depth && !rtex->is_flushing_texture) { + if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture) + r600_blit_push_depth(ctx, rtex); } + pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } @@ -727,19 +747,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, r600_bo_unmap(radeon, bo); } -struct u_resource_vtbl r600_texture_vtbl = -{ - r600_texture_get_handle, /* get_handle */ - r600_texture_destroy, /* resource_destroy */ - r600_texture_is_referenced, /* is_resource_referenced */ - r600_texture_get_transfer, /* get_transfer */ - r600_texture_transfer_destroy, /* transfer_destroy */ - r600_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region,/* transfer_flush_region */ - r600_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - void r600_init_surface_functions(struct r600_pipe_context *r600) { r600->context.create_surface = r600_create_surface; @@ -795,13 +802,16 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, } /* texture format translate */ -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, +uint32_t r600_translate_texformat(struct pipe_screen *screen, + enum pipe_format format, + const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p) { uint32_t result = 0, word4 = 0, yuv_format = 0; const struct util_format_description *desc; boolean uniform = TRUE; + static int r600_enable_s3tc = -1; + int i; const uint32_t sign_bit[4] = { S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), @@ -850,37 +860,65 @@ uint32_t r600_translate_texformat(enum pipe_format format, break; } goto out_unknown; /* TODO */ - + case UTIL_FORMAT_COLORSPACE_SRGB: word4 |= S_038010_FORCE_DEGAMMA(1); - if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB) - goto out_unknown; /* fails for some reason - TODO */ break; default: break; } - /* S3TC formats. TODO */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - static int r600_enable_s3tc = -1; + if (r600_enable_s3tc == -1) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + r600_enable_s3tc = 1; + else + r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + } + + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + if (!r600_enable_s3tc) + goto out_unknown; - if (r600_enable_s3tc == -1) - r600_enable_s3tc = - debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + word4 |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: + result = FMT_BC4; + goto out_word4; + case PIPE_FORMAT_RGTC2_SNORM: + word4 |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: + result = FMT_BC5; + goto out_word4; + default: + goto out_unknown; + } + } + + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { if (!r600_enable_s3tc) goto out_unknown; + if (!util_format_s3tc_enabled) { + goto out_unknown; + } + switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: result = FMT_BC1; goto out_word4; case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: result = FMT_BC2; goto out_word4; case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: result = FMT_BC3; goto out_word4; default: @@ -889,7 +927,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, } - for (i = 0; i < desc->nr_channels; i++) { + for (i = 0; i < desc->nr_channels; i++) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { word4 |= sign_bit[i]; } @@ -897,13 +935,11 @@ uint32_t r600_translate_texformat(enum pipe_format format, /* R8G8Bx_SNORM - TODO CxV8U8 */ - /* RGTC - TODO */ - /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; } - + /* Non-uniform formats. */ if (!uniform) { switch(desc->nr_channels) { @@ -927,7 +963,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = FMT_10_10_10_2; + result = FMT_2_10_10_10; goto out_word4; } goto out_unknown; @@ -990,6 +1026,19 @@ uint32_t r600_translate_texformat(enum pipe_format format, result = FMT_16_16_16_16; goto out_word4; } + goto out_unknown; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + goto out_word4; + case 2: + result = FMT_32_32; + goto out_word4; + case 4: + result = FMT_32_32_32_32; + goto out_word4; + } } goto out_unknown; @@ -1021,7 +1070,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_word4; } } - + } out_word4: if (word4_p) diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 1c227d3215..7482d15e12 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -22,181 +22,34 @@ * * Authors: Dave Airlie <airlied@redhat.com> */ -#include "translate/translate_cache.h" -#include "translate/translate.h" -#include <pipebuffer/pb_buffer.h> + #include <util/u_index_modify.h> +#include "util/u_inlines.h" +#include "util/u_upload_mgr.h" #include "r600_pipe.h" -void r600_begin_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r600_vertex_element *ve = rctx->vertex_elements; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i]) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->elements[i].vertex_buffer_index; - te->input_format = ve->elements[i].src_format; - te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(rctx->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = rctx->vb_max_index + 1; - - out_buffer = pipe_buffer_create(&rctx->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, - vb_transfer[i]); - } - } - - pipe_buffer_unmap(pipe, out_buffer, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); - vb->buffer_offset = 0; - vb->max_index = num_verts - 1; - vb->stride = key.output_stride; - rctx->tran.vb_slot = i; - break; - } - } - - /* Save and replace vertex elements. */ - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } - } - - rctx->tran.new_velems = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); - - pipe_resource_reference(&out_buffer, NULL); -} - -void r600_end_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - - if (rctx->tran.new_velems == NULL) { - return; - } - /* Restore vertex elements. */ - if (rctx->vertex_elements == rctx->tran.new_velems) { - pipe->bind_vertex_elements_state(pipe, NULL); - } - pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); - rctx->tran.new_velems = NULL; - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, - NULL); -} void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r600->context, *index_buffer, 0, *start, count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); *index_size = 2; - *start = 0; - break; - case 2: - case 4: + *start = out_offset / 2; break; } } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index ae19bfb828..df70e2889e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -67,6 +67,10 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -248,6 +252,8 @@ #define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) #define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) #define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF +#define V_0280A0_EXPORT_FULL 0 +#define V_0280A0_EXPORT_NORM 1 #define R_028060_CB_COLOR0_SIZE 0x028060 #define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) @@ -960,8 +966,8 @@ #define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) #define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) #define C_038010_SRF_MODE_ALL 0xFFFFFBFF -#define V_038010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 -#define V_038010_SFR_MODE_NO_ZERO 0x00000001 +#define V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 +#define V_038010_SRF_MODE_NO_ZERO 0x00000001 #define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) #define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) #define C_038010_FORCE_DEGAMMA 0xFFFFF7FF @@ -2332,31 +2338,6 @@ #define R_0280D4_CB_COLOR5_TILE 0x0280D4 #define R_0280D8_CB_COLOR6_TILE 0x0280D8 #define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF #define R_028614_SPI_VS_OUT_ID_0 0x028614 #define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) #define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 892dee86ba..b3c7d1494f 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -26,11 +26,18 @@ #include "r600_asm.h" #include "r700_sq.h" +void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + unsigned count = (cf->ndw / 4) - 1; + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count) | + S_SQ_CF_WORD1_COUNT_3(count >> 3); +} int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | @@ -61,18 +68,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 94e57e40f8..6e2d6ba1ef 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -44,7 +44,10 @@ rbug_destroy(struct pipe_context *_pipe) struct pipe_context *pipe = rb_pipe->pipe; remove_from_list(&rb_pipe->list); + pipe_mutex_lock(rb_pipe->call_mutex); pipe->destroy(pipe); + rb_pipe->pipe = NULL; + pipe_mutex_unlock(rb_pipe->call_mutex); FREE(rb_pipe); } @@ -112,7 +115,12 @@ rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) pipe_mutex_lock(rb_pipe->draw_mutex); rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - pipe->draw_vbo(pipe, info); + pipe_mutex_lock(rb_pipe->call_mutex); + if (!(rb_pipe->curr.fs && rb_pipe->curr.fs->disabled) && + !(rb_pipe->curr.gs && rb_pipe->curr.gs->disabled) && + !(rb_pipe->curr.vs && rb_pipe->curr.vs->disabled)) + pipe->draw_vbo(pipe, info); + pipe_mutex_unlock(rb_pipe->call_mutex); rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); pipe_mutex_unlock(rb_pipe->draw_mutex); @@ -125,8 +133,10 @@ rbug_create_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); return pipe->create_query(pipe, query_type); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -136,8 +146,10 @@ rbug_destroy_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->destroy_query(pipe, query); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -147,8 +159,10 @@ rbug_begin_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->begin_query(pipe, query); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -158,8 +172,10 @@ rbug_end_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->end_query(pipe, query); + pipe_mutex_unlock(rb_pipe->call_mutex); } static boolean @@ -170,11 +186,16 @@ rbug_get_query_result(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + boolean ret; - return pipe->get_query_result(pipe, - query, - wait, - result); + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->get_query_result(pipe, + query, + wait, + result); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } static void * @@ -183,9 +204,14 @@ rbug_create_blend_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; + + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_blend_state(pipe, + blend); + pipe_mutex_unlock(rb_pipe->call_mutex); - return pipe->create_blend_state(pipe, - blend); + return ret; } static void @@ -195,8 +221,10 @@ rbug_bind_blend_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_blend_state(pipe, - blend); + blend); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -206,8 +234,10 @@ rbug_delete_blend_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_blend_state(pipe, blend); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -216,9 +246,14 @@ rbug_create_sampler_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; + + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_sampler_state(pipe, + sampler); + pipe_mutex_unlock(rb_pipe->call_mutex); - return pipe->create_sampler_state(pipe, - sampler); + return ret; } static void @@ -229,9 +264,11 @@ rbug_bind_fragment_sampler_states(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_fragment_sampler_states(pipe, num_samplers, samplers); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -242,9 +279,11 @@ rbug_bind_vertex_sampler_states(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_vertex_sampler_states(pipe, num_samplers, samplers); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -254,8 +293,10 @@ rbug_delete_sampler_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_sampler_state(pipe, sampler); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -264,9 +305,14 @@ rbug_create_rasterizer_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; - return pipe->create_rasterizer_state(pipe, - rasterizer); + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_rasterizer_state(pipe, + rasterizer); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } static void @@ -276,8 +322,10 @@ rbug_bind_rasterizer_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_rasterizer_state(pipe, rasterizer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -287,8 +335,10 @@ rbug_delete_rasterizer_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_rasterizer_state(pipe, rasterizer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -297,9 +347,14 @@ rbug_create_depth_stencil_alpha_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; + + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); + pipe_mutex_unlock(rb_pipe->call_mutex); - return pipe->create_depth_stencil_alpha_state(pipe, - depth_stencil_alpha); + return ret; } static void @@ -309,8 +364,10 @@ rbug_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_alpha); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -320,8 +377,10 @@ rbug_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_depth_stencil_alpha_state(pipe, depth_stencil_alpha); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -332,7 +391,10 @@ rbug_create_fs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_fs_state(pipe, state); + pipe_mutex_unlock(rb_pipe->call_mutex); + if (!result) return NULL; @@ -347,10 +409,14 @@ rbug_bind_fs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *fs; + pipe_mutex_lock(rb_pipe->call_mutex); + fs = rbug_shader_unwrap(_fs); rb_pipe->curr.fs = rbug_shader(_fs); pipe->bind_fs_state(pipe, fs); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -360,7 +426,9 @@ rbug_delete_fs_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_shader *rb_shader = rbug_shader(_fs); + pipe_mutex_lock(rb_pipe->call_mutex); rbug_shader_destroy(rb_pipe, rb_shader); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -371,7 +439,10 @@ rbug_create_vs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_vs_state(pipe, state); + pipe_mutex_unlock(rb_pipe->call_mutex); + if (!result) return NULL; @@ -386,10 +457,14 @@ rbug_bind_vs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *vs; + pipe_mutex_lock(rb_pipe->call_mutex); + vs = rbug_shader_unwrap(_vs); rb_pipe->curr.vs = rbug_shader(_vs); pipe->bind_vs_state(pipe, vs); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -399,7 +474,9 @@ rbug_delete_vs_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_shader *rb_shader = rbug_shader(_vs); + pipe_mutex_unlock(rb_pipe->call_mutex); rbug_shader_destroy(rb_pipe, rb_shader); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -410,7 +487,10 @@ rbug_create_gs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_gs_state(pipe, state); + pipe_mutex_unlock(rb_pipe->call_mutex); + if (!result) return NULL; @@ -425,10 +505,14 @@ rbug_bind_gs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *gs; + pipe_mutex_lock(rb_pipe->call_mutex); + gs = rbug_shader_unwrap(_gs); rb_pipe->curr.gs = rbug_shader(_gs); pipe->bind_gs_state(pipe, gs); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -438,7 +522,9 @@ rbug_delete_gs_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_shader *rb_shader = rbug_shader(_gs); + pipe_mutex_lock(rb_pipe->call_mutex); rbug_shader_destroy(rb_pipe, rb_shader); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -448,10 +534,15 @@ rbug_create_vertex_elements_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; - return pipe->create_vertex_elements_state(pipe, + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_vertex_elements_state(pipe, num_elements, vertex_elements); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } static void @@ -461,8 +552,10 @@ rbug_bind_vertex_elements_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_vertex_elements_state(pipe, velems); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -472,8 +565,10 @@ rbug_delete_vertex_elements_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_vertex_elements_state(pipe, velems); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -483,8 +578,10 @@ rbug_set_blend_color(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_blend_color(pipe, blend_color); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -494,8 +591,10 @@ rbug_set_stencil_ref(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_stencil_ref(pipe, stencil_ref); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -505,8 +604,10 @@ rbug_set_clip_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_clip_state(pipe, clip); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -526,10 +627,12 @@ rbug_set_constant_buffer(struct pipe_context *_pipe, resource = unwrapped_resource; } + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_constant_buffer(pipe, shader, index, resource); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -542,8 +645,12 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe, struct pipe_framebuffer_state *state = NULL; unsigned i; + /* must protect curr status */ + pipe_mutex_lock(rb_pipe->call_mutex); + rb_pipe->curr.nr_cbufs = 0; memset(rb_pipe->curr.cbufs, 0, sizeof(rb_pipe->curr.cbufs)); + rb_pipe->curr.zsbuf = NULL; /* unwrap the input state */ if (_state) { @@ -556,11 +663,15 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe, rb_pipe->curr.cbufs[i] = rbug_resource(_state->cbufs[i]->texture); } unwrapped_state.zsbuf = rbug_surface_unwrap(_state->zsbuf); + if (_state->zsbuf) + rb_pipe->curr.zsbuf = rbug_resource(_state->zsbuf->texture); state = &unwrapped_state; } pipe->set_framebuffer_state(pipe, state); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -570,8 +681,10 @@ rbug_set_polygon_stipple(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_polygon_stipple(pipe, poly_stipple); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -581,8 +694,10 @@ rbug_set_scissor_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_scissor_state(pipe, scissor); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -592,8 +707,10 @@ rbug_set_viewport_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_viewport_state(pipe, viewport); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -607,6 +724,9 @@ rbug_set_fragment_sampler_views(struct pipe_context *_pipe, struct pipe_sampler_view **views = NULL; unsigned i; + /* must protect curr status */ + pipe_mutex_lock(rb_pipe->call_mutex); + rb_pipe->curr.num_fs_views = 0; memset(rb_pipe->curr.fs_views, 0, sizeof(rb_pipe->curr.fs_views)); memset(rb_pipe->curr.fs_texs, 0, sizeof(rb_pipe->curr.fs_texs)); @@ -623,6 +743,8 @@ rbug_set_fragment_sampler_views(struct pipe_context *_pipe, } pipe->set_fragment_sampler_views(pipe, num, views); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -636,6 +758,9 @@ rbug_set_vertex_sampler_views(struct pipe_context *_pipe, struct pipe_sampler_view **views = NULL; unsigned i; + /* must protect curr status */ + pipe_mutex_lock(rb_pipe->call_mutex); + rb_pipe->curr.num_vs_views = 0; memset(rb_pipe->curr.vs_views, 0, sizeof(rb_pipe->curr.vs_views)); memset(rb_pipe->curr.vs_texs, 0, sizeof(rb_pipe->curr.vs_texs)); @@ -652,6 +777,8 @@ rbug_set_vertex_sampler_views(struct pipe_context *_pipe, } pipe->set_vertex_sampler_views(pipe, num, views); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -665,6 +792,8 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, struct pipe_vertex_buffer *buffers = NULL; unsigned i; + pipe_mutex_lock(rb_pipe->call_mutex); + if (num_buffers) { memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers)); for (i = 0; i < num_buffers; i++) @@ -675,6 +804,8 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, pipe->set_vertex_buffers(pipe, num_buffers, buffers); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -691,7 +822,9 @@ rbug_set_index_buffer(struct pipe_context *_pipe, ib = &unwrapped_ib; } + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_index_buffer(pipe, ib); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -701,7 +834,9 @@ rbug_set_sample_mask(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_sample_mask(pipe, sample_mask); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -722,6 +857,7 @@ rbug_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst = rb_resource_dst->resource; struct pipe_resource *src = rb_resource_src->resource; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->resource_copy_region(pipe, dst, dst_level, @@ -731,6 +867,7 @@ rbug_resource_copy_region(struct pipe_context *_pipe, src, src_level, src_box); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -743,11 +880,13 @@ rbug_clear(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->clear(pipe, buffers, rgba, depth, stencil); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -762,6 +901,7 @@ rbug_clear_render_target(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; struct pipe_surface *dst = rb_surface_dst->surface; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->clear_render_target(pipe, dst, rgba, @@ -769,6 +909,7 @@ rbug_clear_render_target(struct pipe_context *_pipe, dsty, width, height); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -785,6 +926,7 @@ rbug_clear_depth_stencil(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; struct pipe_surface *dst = rb_surface_dst->surface; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->clear_depth_stencil(pipe, dst, clear_flags, @@ -794,36 +936,20 @@ rbug_clear_depth_stencil(struct pipe_context *_pipe, dsty, width, height); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void rbug_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->flush(pipe, - flags, fence); -} - -static unsigned int -rbug_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, - int layer) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct rbug_resource *rb_resource = rbug_resource(_resource); - struct pipe_context *pipe = rb_pipe->pipe; - struct pipe_resource *resource = rb_resource->resource; - - return pipe->is_resource_referenced(pipe, - resource, - level, - layer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static struct pipe_sampler_view * @@ -837,9 +963,11 @@ rbug_context_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *resource = rb_resource->resource; struct pipe_sampler_view *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_sampler_view(pipe, resource, templ); + pipe_mutex_unlock(rb_pipe->call_mutex); if (result) return rbug_sampler_view_create(rb_pipe, rb_resource, result); @@ -865,9 +993,11 @@ rbug_context_create_surface(struct pipe_context *_pipe, struct pipe_resource *resource = rb_resource->resource; struct pipe_surface *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_surface(pipe, resource, surf_tmpl); + pipe_mutex_unlock(rb_pipe->call_mutex); if (result) return rbug_surface_create(rb_pipe, rb_resource, result); @@ -878,8 +1008,13 @@ static void rbug_context_surface_destroy(struct pipe_context *_pipe, struct pipe_surface *_surface) { - rbug_surface_destroy(rbug_context(_pipe), - rbug_surface(_surface)); + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct rbug_surface *rb_surface = rbug_surface(_surface); + + pipe_mutex_lock(rb_pipe->call_mutex); + rbug_surface_destroy(rb_pipe, + rb_surface); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -897,11 +1032,13 @@ rbug_context_get_transfer(struct pipe_context *_context, struct pipe_resource *resource = rb_resource->resource; struct pipe_transfer *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = context->get_transfer(context, resource, level, usage, box); + pipe_mutex_unlock(rb_pipe->call_mutex); if (result) return rbug_transfer_create(rb_pipe, rb_resource, result); @@ -912,8 +1049,13 @@ static void rbug_context_transfer_destroy(struct pipe_context *_pipe, struct pipe_transfer *_transfer) { - rbug_transfer_destroy(rbug_context(_pipe), - rbug_transfer(_transfer)); + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct rbug_transfer *rb_transfer =rbug_transfer(_transfer); + + pipe_mutex_lock(rb_pipe->call_mutex); + rbug_transfer_destroy(rb_pipe, + rb_transfer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -924,9 +1066,14 @@ rbug_context_transfer_map(struct pipe_context *_context, struct rbug_transfer *rb_transfer = rbug_transfer(_transfer); struct pipe_context *context = rb_pipe->pipe; struct pipe_transfer *transfer = rb_transfer->transfer; + void *ret; - return context->transfer_map(context, + pipe_mutex_lock(rb_pipe->call_mutex); + ret = context->transfer_map(context, transfer); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } @@ -941,9 +1088,11 @@ rbug_context_transfer_flush_region(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_transfer *transfer = rb_transfer->transfer; + pipe_mutex_lock(rb_pipe->call_mutex); context->transfer_flush_region(context, transfer, box); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -956,8 +1105,10 @@ rbug_context_transfer_unmap(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_transfer *transfer = rb_transfer->transfer; + pipe_mutex_lock(rb_pipe->call_mutex); context->transfer_unmap(context, transfer); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -976,6 +1127,7 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_resource *resource = rb_resource->resource; + pipe_mutex_lock(rb_pipe->call_mutex); context->transfer_inline_write(context, resource, level, @@ -984,6 +1136,22 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, data, stride, layer_stride); + pipe_mutex_unlock(rb_pipe->call_mutex); +} + + +static void rbug_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct rbug_context *rb_pipe = rbug_context(_context); + struct rbug_resource *rb_resource = rbug_resource(_resource); + struct pipe_context *context = rb_pipe->pipe; + struct pipe_resource *resource = rb_resource->resource; + + pipe_mutex_lock(rb_pipe->call_mutex); + context->redefine_user_buffer(context, resource, offset, size); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -1061,7 +1229,6 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.clear_render_target = rbug_clear_render_target; rb_pipe->base.clear_depth_stencil = rbug_clear_depth_stencil; rb_pipe->base.flush = rbug_flush; - rb_pipe->base.is_resource_referenced = rbug_is_resource_referenced; rb_pipe->base.create_sampler_view = rbug_context_create_sampler_view; rb_pipe->base.sampler_view_destroy = rbug_context_sampler_view_destroy; rb_pipe->base.create_surface = rbug_context_create_surface; @@ -1072,6 +1239,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap; rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region; rb_pipe->base.transfer_inline_write = rbug_context_transfer_inline_write; + rb_pipe->base.redefine_user_buffer = rbug_redefine_user_buffer; rb_pipe->pipe = pipe; diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c index eb772d19d0..b80bcd4228 100644 --- a/src/gallium/drivers/rbug/rbug_core.c +++ b/src/gallium/drivers/rbug/rbug_core.c @@ -498,7 +498,7 @@ rbug_context_flush(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32 /* protect the pipe context */ pipe_mutex_lock(rb_context->call_mutex); - rb_context->pipe->flush(rb_context->pipe, flush->flags, NULL); + rb_context->pipe->flush(rb_context->pipe, NULL); pipe_mutex_unlock(rb_context->call_mutex); pipe_mutex_unlock(rb_screen->list_mutex); diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c index 7d7cc482ae..15f5db4009 100644 --- a/src/gallium/drivers/rbug/rbug_objects.c +++ b/src/gallium/drivers/rbug/rbug_objects.c @@ -98,8 +98,9 @@ rbug_surface_create(struct rbug_context *rb_context, pipe_reference_init(&rb_surface->base.reference, 1); rb_surface->base.texture = NULL; + rb_surface->base.context = &rb_context->base; + rb_surface->surface = surface; /* we own the surface already */ pipe_resource_reference(&rb_surface->base.texture, &rb_resource->base); - rb_surface->surface = surface; return &rb_surface->base; @@ -113,8 +114,7 @@ rbug_surface_destroy(struct rbug_context *rb_context, struct rbug_surface *rb_surface) { pipe_resource_reference(&rb_surface->base.texture, NULL); - rb_context->pipe->surface_destroy(rb_context->pipe, - rb_surface->surface); + pipe_surface_reference(&rb_surface->surface, NULL); FREE(rb_surface); } diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c index d635ce575c..7c8dfdcc12 100644 --- a/src/gallium/drivers/rbug/rbug_screen.c +++ b/src/gallium/drivers/rbug/rbug_screen.c @@ -106,8 +106,7 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; @@ -116,8 +115,7 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen, format, target, sample_count, - tex_usage, - geom_flags); + tex_usage); } static struct pipe_context * @@ -240,30 +238,28 @@ rbug_screen_fence_reference(struct pipe_screen *_screen, fence); } -static int +static boolean rbug_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; return screen->fence_signalled(screen, - fence, - flags); + fence); } -static int +static boolean rbug_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; return screen->fence_finish(screen, fence, - flags); + timeout); } boolean diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e935ce6d21..ce22f64622 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -91,10 +91,17 @@ softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.pstipple->destroy( softpipe->quad.pstipple ); + if (softpipe->quad.shade) + softpipe->quad.shade->destroy( softpipe->quad.shade ); + + if (softpipe->quad.depth_test) + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + + if (softpipe->quad.blend) + softpipe->quad.blend->destroy( softpipe->quad.blend ); + + if (softpipe->quad.pstipple) + softpipe->quad.pstipple->destroy( softpipe->quad.pstipple ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); @@ -105,8 +112,8 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - sp_destroy_tex_tile_cache(softpipe->tex_cache[i]); - pipe_sampler_view_reference(&softpipe->sampler_views[i], NULL); + sp_destroy_tex_tile_cache(softpipe->fragment_tex_cache[i]); + pipe_sampler_view_reference(&softpipe->fragment_sampler_views[i], NULL); } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { @@ -129,6 +136,10 @@ softpipe_destroy( struct pipe_context *pipe ) } } + for (i = 0; i < softpipe->num_vertex_buffers; i++) { + pipe_resource_reference(&softpipe->vertex_buffer[i].buffer, NULL); + } + tgsi_exec_machine_destroy(softpipe->fs_machine); FREE( softpipe ); @@ -137,13 +148,13 @@ softpipe_destroy( struct pipe_context *pipe ) /** * if (the texture is being used as a framebuffer surface) - * return PIPE_REFERENCED_FOR_WRITE + * return SP_REFERENCED_FOR_WRITE * else if (the texture is a bound texture source) - * return PIPE_REFERENCED_FOR_READ + * return SP_REFERENCED_FOR_READ * else - * return PIPE_UNREFERENCED + * return SP_UNREFERENCED */ -static unsigned int +unsigned int softpipe_is_resource_referenced( struct pipe_context *pipe, struct pipe_resource *texture, unsigned level, int layer) @@ -152,40 +163,40 @@ softpipe_is_resource_referenced( struct pipe_context *pipe, unsigned i; if (texture->target == PIPE_BUFFER) - return PIPE_UNREFERENCED; + return SP_UNREFERENCED; /* check if any of the bound drawing surfaces are this texture */ if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { if (softpipe->framebuffer.cbufs[i] && softpipe->framebuffer.cbufs[i]->texture == texture) { - return PIPE_REFERENCED_FOR_WRITE; + return SP_REFERENCED_FOR_WRITE; } } if (softpipe->framebuffer.zsbuf && softpipe->framebuffer.zsbuf->texture == texture) { - return PIPE_REFERENCED_FOR_WRITE; + return SP_REFERENCED_FOR_WRITE; } } /* check if any of the tex_cache textures are this texture */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (softpipe->tex_cache[i] && - softpipe->tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; + if (softpipe->fragment_tex_cache[i] && + softpipe->fragment_tex_cache[i]->texture == texture) + return SP_REFERENCED_FOR_READ; } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { if (softpipe->vertex_tex_cache[i] && softpipe->vertex_tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; + return SP_REFERENCED_FOR_READ; } for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { if (softpipe->geometry_tex_cache[i] && softpipe->geometry_tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; + return SP_REFERENCED_FOR_READ; } - return PIPE_UNREFERENCED; + return SP_UNREFERENCED; } @@ -219,7 +230,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->use_sse = FALSE; #endif - softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE ); softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = NULL; @@ -244,9 +255,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; softpipe->pipe.clear = softpipe_clear; - softpipe->pipe.flush = softpipe_flush; - - softpipe->pipe.is_resource_referenced = softpipe_is_resource_referenced; + softpipe->pipe.flush = softpipe_flush_wrapped; softpipe->pipe.render_condition = softpipe_render_condition; @@ -258,13 +267,22 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->cbuf_cache[i] = sp_create_tile_cache( &softpipe->pipe ); softpipe->zsbuf_cache = sp_create_tile_cache( &softpipe->pipe ); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->fragment_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); + if (!softpipe->fragment_tex_cache[i]) + goto fail; + } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); + if (!softpipe->vertex_tex_cache[i]) + goto fail; } + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { softpipe->geometry_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); + if (!softpipe->geometry_tex_cache[i]) + goto fail; } softpipe->fs_machine = tgsi_exec_machine_create(); @@ -295,7 +313,7 @@ softpipe_create_context( struct pipe_screen *screen, (struct tgsi_sampler **) softpipe->tgsi.geom_samplers_list); - if (debug_get_bool_option( "SP_NO_RAST", FALSE )) + if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE )) softpipe->no_rast = TRUE; softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 9361a3df09..a572ee8cf0 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,7 +58,7 @@ struct softpipe_context { /** Constant state objects */ struct pipe_blend_state *blend; - struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *fragment_samplers[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; @@ -77,7 +77,7 @@ struct softpipe_context { struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_viewport_state viewport; @@ -91,8 +91,8 @@ struct softpipe_context { } so_target; struct pipe_query_data_so_statistics so_stats; - unsigned num_samplers; - unsigned num_sampler_views; + unsigned num_fragment_samplers; + unsigned num_fragment_sampler_views; unsigned num_vertex_samplers; unsigned num_vertex_sampler_views; unsigned num_geometry_samplers; @@ -154,9 +154,9 @@ struct softpipe_context { /** TGSI exec things */ struct { - struct sp_sampler_varient *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS]; - struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; - struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_variant *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS]; + struct sp_sampler_variant *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; + struct sp_sampler_variant *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; struct tgsi_exec_machine *fs_machine; @@ -174,7 +174,7 @@ struct softpipe_context { struct softpipe_tile_cache *zsbuf_cache; unsigned tex_timestamp; - struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct softpipe_tex_tile_cache *fragment_tex_cache[PIPE_MAX_SAMPLERS]; struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; struct softpipe_tex_tile_cache *geometry_tex_cache[PIPE_MAX_GEOMETRY_SAMPLERS]; @@ -192,10 +192,19 @@ softpipe_context( struct pipe_context *pipe ) } void -softpipe_reset_sampler_varients(struct softpipe_context *softpipe); +softpipe_reset_sampler_variants(struct softpipe_context *softpipe); struct pipe_context * softpipe_create_context( struct pipe_screen *, void *priv ); +#define SP_UNREFERENCED 0 +#define SP_REFERENCED_FOR_READ (1 << 0) +#define SP_REFERENCED_FOR_WRITE (1 << 1) + +unsigned int +softpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned level, int layer); + #endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c index 66c5214113..7b79a0df4e 100644 --- a/src/gallium/drivers/softpipe/sp_fence.c +++ b/src/gallium/drivers/softpipe/sp_fence.c @@ -41,23 +41,22 @@ softpipe_fence_reference(struct pipe_screen *screen, } -static int +static boolean softpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { assert(!fence); - return 0; + return TRUE; } -static int +static boolean softpipe_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { assert(!fence); - return 0; + return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 3a09ee8b4f..b7f283bf27 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -42,7 +42,7 @@ void softpipe_flush( struct pipe_context *pipe, - unsigned flags, + unsigned flags, struct pipe_fence_handle **fence ) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -50,9 +50,9 @@ softpipe_flush( struct pipe_context *pipe, draw_flush(softpipe->draw); - if (1 || (flags & PIPE_FLUSH_TEXTURE_CACHE)) { - for (i = 0; i < softpipe->num_sampler_views; i++) { - sp_flush_tex_tile_cache(softpipe->tex_cache[i]); + if (1 || (flags & SP_FLUSH_TEXTURE_CACHE)) { + for (i = 0; i < softpipe->num_fragment_sampler_views; i++) { + sp_flush_tex_tile_cache(softpipe->fragment_tex_cache[i]); } for (i = 0; i < softpipe->num_vertex_sampler_views; i++) { sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]); @@ -62,34 +62,27 @@ softpipe_flush( struct pipe_context *pipe, } } - if (flags & PIPE_FLUSH_SWAPBUFFERS) { - /* If this is a swapbuffers, just flush color buffers. - * - * The zbuffer changes are not discarded, but held in the cache - * in the hope that a later clear will wipe them out. - */ - for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) - if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe->cbuf_cache[i]); - - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - softpipe_unmap_transfers(softpipe); - } - else if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) - if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe->cbuf_cache[i]); - - if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe->zsbuf_cache); - - softpipe->dirty_render_cache = FALSE; - } + /* If this is a swapbuffers, just flush color buffers. + * + * The zbuffer changes are not discarded, but held in the cache + * in the hope that a later clear will wipe them out. + */ + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe->zsbuf_cache); + + softpipe->dirty_render_cache = FALSE; + + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_transfers(softpipe); /* Enable to dump BMPs of the color/depth buffers each frame */ #if 0 @@ -108,6 +101,13 @@ softpipe_flush( struct pipe_context *pipe, *fence = NULL; } +void +softpipe_flush_wrapped( struct pipe_context *pipe, + struct pipe_fence_handle **fence ) +{ + softpipe_flush(pipe, SP_FLUSH_TEXTURE_CACHE, fence); +} + /** * Flush context if necessary. @@ -129,21 +129,18 @@ softpipe_flush_resource(struct pipe_context *pipe, { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, texture, level, layer); + referenced = softpipe_is_resource_referenced(pipe, texture, level, layer); - if ((referenced & PIPE_REFERENCED_FOR_WRITE) || - ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + if ((referenced & SP_REFERENCED_FOR_WRITE) || + ((referenced & SP_REFERENCED_FOR_READ) && !read_only)) { /* * TODO: The semantics of these flush flags are too obtuse. They should * disappear and the pipe driver should just ensure that all visible * side-effects happen when they need to happen. */ - if (referenced & PIPE_REFERENCED_FOR_WRITE) - flush_flags |= PIPE_FLUSH_RENDER_CACHE; - - if (referenced & PIPE_REFERENCED_FOR_READ) - flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + if (referenced & SP_REFERENCED_FOR_READ) + flush_flags |= SP_FLUSH_TEXTURE_CACHE; if (cpu_access) { /* @@ -155,14 +152,15 @@ softpipe_flush_resource(struct pipe_context *pipe, if (do_not_block) return FALSE; - pipe->flush(pipe, flush_flags, &fence); + softpipe_flush(pipe, flush_flags, &fence); if (fence) { /* * This is for illustrative purposes only, as softpipe does not * have fences. */ - pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_finish(pipe->screen, fence, + PIPE_TIMEOUT_INFINITE); pipe->screen->fence_reference(pipe->screen, &fence, NULL); } } else { @@ -170,7 +168,7 @@ softpipe_flush_resource(struct pipe_context *pipe, * Just flush. */ - pipe->flush(pipe, flush_flags, NULL); + softpipe_flush(pipe, flush_flags, NULL); } } diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index 22a5ceeb9e..ab01c249ab 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -33,10 +33,17 @@ struct pipe_context; struct pipe_fence_handle; +#define SP_FLUSH_TEXTURE_CACHE 0x2 + void -softpipe_flush(struct pipe_context *pipe, unsigned flags, +softpipe_flush(struct pipe_context *pipe, + unsigned flags, struct pipe_fence_handle **fence); +void +softpipe_flush_wrapped( struct pipe_context *pipe, + struct pipe_fence_handle **fence ); + boolean softpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *texture, diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 6af1b2d061..76cfc0bf51 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_format.h" #include "sp_context.h" +#include "sp_state.h" #include "sp_quad.h" #include "sp_tile_cache.h" #include "sp_quad_pipe.h" @@ -794,6 +795,9 @@ blend_fallback(struct quad_stage *qs, struct softpipe_context *softpipe = qs->softpipe; const struct pipe_blend_state *blend = softpipe->blend; unsigned cbuf; + boolean write_all; + + write_all = softpipe->fs->color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -806,15 +810,19 @@ blend_fallback(struct quad_stage *qs, quads[0]->input.y0); boolean has_dst_alpha = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); - uint q, i, j; + uint q, i, j, qbuf; + + qbuf = write_all ? 0 : cbuf; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[cbuf]; + float (*quadColor)[4]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - /* get/swizzle dest colors + quadColor = quad->output.color[qbuf]; + + /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { int x = itx + (j & 1); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index cbdea19af4..48aabc18da 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -123,6 +123,11 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + return 1; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; default: return 0; } @@ -175,15 +180,16 @@ softpipe_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bind, - unsigned geom_flags ) + unsigned bind) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; const struct util_format_description *format_desc; assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 5d727dc00d..0ce28f4c6e 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -575,7 +575,7 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height : 0) + (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + (spfs->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 525bf23734..bb19f8cff2 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -74,7 +74,7 @@ struct sp_fragment_shader { boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ - + boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */ void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 3ba4d934fd..f9590eb0b2 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -197,11 +197,11 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) { unsigned i; - softpipe_reset_sampler_varients( softpipe ); + softpipe_reset_sampler_variants( softpipe ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - struct softpipe_tex_tile_cache *tc = softpipe->tex_cache[i]; - if (tc->texture) { + struct softpipe_tex_tile_cache *tc = softpipe->fragment_tex_cache[i]; + if (tc && tc->texture) { struct softpipe_resource *spt = softpipe_resource(tc->texture); if (spt->timestamp != tc->timestamp) { sp_tex_tile_cache_validate_texture( tc ); @@ -216,7 +216,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i]; - if (tc->texture) { + if (tc && tc->texture) { struct softpipe_resource *spt = softpipe_resource(tc->texture); if (spt->timestamp != tc->timestamp) { @@ -229,7 +229,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { struct softpipe_tex_tile_cache *tc = softpipe->geometry_tex_cache[i]; - if (tc->texture) { + if (tc && tc->texture) { struct softpipe_resource *spt = softpipe_resource(tc->texture); if (spt->timestamp != tc->timestamp) { diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index b59fbc33ed..60331bc497 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -43,8 +43,8 @@ struct sp_sampler { struct pipe_sampler_state base; - struct sp_sampler_varient *varients; - struct sp_sampler_varient *current; + struct sp_sampler_variant *variants; + struct sp_sampler_variant *current; }; static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler ) @@ -60,15 +60,15 @@ softpipe_create_sampler_state(struct pipe_context *pipe, struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler); sp_sampler->base = *sampler; - sp_sampler->varients = NULL; + sp_sampler->variants = NULL; return (void *)sp_sampler; } static void -softpipe_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +softpipe_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct softpipe_context *softpipe = softpipe_context(pipe); unsigned i; @@ -76,18 +76,18 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == softpipe->num_samplers && - !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + if (num == softpipe->num_fragment_samplers && + !memcmp(softpipe->fragment_samplers, sampler, num * sizeof(void *))) return; draw_flush(softpipe->draw); for (i = 0; i < num; ++i) - softpipe->sampler[i] = sampler[i]; + softpipe->fragment_samplers[i] = sampler[i]; for (i = num; i < PIPE_MAX_SAMPLERS; ++i) - softpipe->sampler[i] = NULL; + softpipe->fragment_samplers[i] = NULL; - softpipe->num_samplers = num; + softpipe->num_fragment_samplers = num; softpipe->dirty |= SP_NEW_SAMPLER; } @@ -181,9 +181,9 @@ softpipe_sampler_view_destroy(struct pipe_context *pipe, static void -softpipe_set_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) +softpipe_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct softpipe_context *softpipe = softpipe_context(pipe); uint i; @@ -191,8 +191,9 @@ softpipe_set_sampler_views(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == softpipe->num_sampler_views && - !memcmp(softpipe->sampler_views, views, num * sizeof(struct pipe_sampler_view *))) + if (num == softpipe->num_fragment_sampler_views && + !memcmp(softpipe->fragment_sampler_views, views, + num * sizeof(struct pipe_sampler_view *))) return; draw_flush(softpipe->draw); @@ -200,11 +201,11 @@ softpipe_set_sampler_views(struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; - pipe_sampler_view_reference(&softpipe->sampler_views[i], view); - sp_tex_tile_cache_set_sampler_view(softpipe->tex_cache[i], view); + pipe_sampler_view_reference(&softpipe->fragment_sampler_views[i], view); + sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[i], view); } - softpipe->num_sampler_views = num; + softpipe->num_fragment_sampler_views = num; softpipe->dirty |= SP_NEW_TEXTURE; } @@ -277,23 +278,23 @@ softpipe_set_geometry_sampler_views(struct pipe_context *pipe, /** - * Find/create an sp_sampler_varient object for sampling the given texture, + * Find/create an sp_sampler_variant object for sampling the given texture, * sampler and tex unit. * * Note that the tex unit is significant. We can't re-use a sampler - * varient for multiple texture units because the sampler varient contains + * variant for multiple texture units because the sampler variant contains * the texture object pointer. If the texture object pointer were stored - * somewhere outside the sampler varient, we could re-use samplers for + * somewhere outside the sampler variant, we could re-use samplers for * multiple texture units. */ -static struct sp_sampler_varient * -get_sampler_varient( unsigned unit, +static struct sp_sampler_variant * +get_sampler_variant( unsigned unit, struct sp_sampler *sampler, - struct pipe_resource *resource, + struct pipe_sampler_view *view, unsigned processor ) { - struct softpipe_resource *sp_texture = softpipe_resource(resource); - struct sp_sampler_varient *v = NULL; + struct softpipe_resource *sp_texture = softpipe_resource(view->texture); + struct sp_sampler_variant *v = NULL; union sp_sampler_key key; /* if this fails, widen the key.unit field and update this assertion */ @@ -303,6 +304,10 @@ get_sampler_varient( unsigned unit, key.bits.is_pot = sp_texture->pot; key.bits.processor = processor; key.bits.unit = unit; + key.bits.swizzle_r = view->swizzle_r; + key.bits.swizzle_g = view->swizzle_g; + key.bits.swizzle_b = view->swizzle_b; + key.bits.swizzle_a = view->swizzle_a; key.bits.pad = 0; if (sampler->current && @@ -311,14 +316,14 @@ get_sampler_varient( unsigned unit, } if (v == NULL) { - for (v = sampler->varients; v; v = v->next) + for (v = sampler->variants; v; v = v->next) if (v->key.value == key.value) break; if (v == NULL) { - v = sp_create_sampler_varient( &sampler->base, key ); - v->next = sampler->varients; - sampler->varients = v; + v = sp_create_sampler_variant( &sampler->base, key ); + v->next = sampler->variants; + sampler->variants = v; } } @@ -328,7 +333,7 @@ get_sampler_varient( unsigned unit, void -softpipe_reset_sampler_varients(struct softpipe_context *softpipe) +softpipe_reset_sampler_variants(struct softpipe_context *softpipe) { int i; @@ -338,65 +343,47 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) */ for (i = 0; i <= softpipe->vs->max_sampler; i++) { if (softpipe->vertex_samplers[i]) { - struct pipe_resource *texture = NULL; - - if (softpipe->vertex_sampler_views[i]) { - texture = softpipe->vertex_sampler_views[i]->texture; - } - softpipe->tgsi.vert_samplers_list[i] = - get_sampler_varient( i, + get_sampler_variant( i, sp_sampler(softpipe->vertex_samplers[i]), - texture, + softpipe->vertex_sampler_views[i], TGSI_PROCESSOR_VERTEX ); - sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], - softpipe->vertex_tex_cache[i], - texture ); + sp_sampler_variant_bind_view( softpipe->tgsi.vert_samplers_list[i], + softpipe->vertex_tex_cache[i], + softpipe->vertex_sampler_views[i] ); } } if (softpipe->gs) { for (i = 0; i <= softpipe->gs->max_sampler; i++) { if (softpipe->geometry_samplers[i]) { - struct pipe_resource *texture = NULL; - - if (softpipe->geometry_sampler_views[i]) { - texture = softpipe->geometry_sampler_views[i]->texture; - } - softpipe->tgsi.geom_samplers_list[i] = - get_sampler_varient( + get_sampler_variant( i, sp_sampler(softpipe->geometry_samplers[i]), - texture, + softpipe->geometry_sampler_views[i], TGSI_PROCESSOR_GEOMETRY ); - sp_sampler_varient_bind_texture( + sp_sampler_variant_bind_view( softpipe->tgsi.geom_samplers_list[i], softpipe->geometry_tex_cache[i], - texture ); + softpipe->geometry_sampler_views[i] ); } } } for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { - if (softpipe->sampler[i]) { - struct pipe_resource *texture = NULL; - - if (softpipe->sampler_views[i]) { - texture = softpipe->sampler_views[i]->texture; - } - + if (softpipe->fragment_samplers[i]) { softpipe->tgsi.frag_samplers_list[i] = - get_sampler_varient( i, - sp_sampler(softpipe->sampler[i]), - texture, + get_sampler_variant( i, + sp_sampler(softpipe->fragment_samplers[i]), + softpipe->fragment_sampler_views[i], TGSI_PROCESSOR_FRAGMENT ); - sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i], - softpipe->tex_cache[i], - texture ); + sp_sampler_variant_bind_view( softpipe->tgsi.frag_samplers_list[i], + softpipe->fragment_tex_cache[i], + softpipe->fragment_sampler_views[i] ); } } } @@ -406,11 +393,11 @@ softpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler; - struct sp_sampler_varient *v, *tmp; + struct sp_sampler_variant *v, *tmp; - for (v = sp_sampler->varients; v; v = tmp) { + for (v = sp_sampler->variants; v; v = tmp) { tmp = v->next; - sp_sampler_varient_destroy(v); + sp_sampler_variant_destroy(v); } FREE( sampler ); @@ -421,12 +408,12 @@ void softpipe_init_sampler_funcs(struct pipe_context *pipe) { pipe->create_sampler_state = softpipe_create_sampler_state; - pipe->bind_fragment_sampler_states = softpipe_bind_sampler_states; + pipe->bind_fragment_sampler_states = softpipe_bind_fragment_sampler_states; pipe->bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; pipe->bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; pipe->delete_sampler_state = softpipe_delete_sampler_state; - pipe->set_fragment_sampler_views = softpipe_set_sampler_views; + pipe->set_fragment_sampler_views = softpipe_set_fragment_sampler_views; pipe->set_vertex_sampler_views = softpipe_set_vertex_sampler_views; pipe->set_geometry_sampler_views = softpipe_set_geometry_sampler_views; diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 7fff338cce..3dec5de3cc 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -78,6 +78,8 @@ softpipe_create_fs_state(struct pipe_context *pipe, state->origin_lower_left = state->info.properties[i].data[0]; else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) state->pixel_center_integer = state->info.properties[i].data[0]; + else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) + state->color0_writes_all_cbufs = state->info.properties[i].data[0]; } return state; @@ -89,8 +91,6 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - draw_flush(softpipe->draw); - if (softpipe->fs == fs) return; diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 7d8055f2ba..aa0b333c7a 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -33,6 +33,8 @@ #include "sp_state.h" #include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -84,8 +86,9 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); - memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); - softpipe->num_vertex_buffers = count; + util_copy_vertex_buffers(softpipe->vertex_buffer, + &softpipe->num_vertex_buffers, + buffers, count); softpipe->dirty |= SP_NEW_VERTEX; @@ -117,4 +120,5 @@ softpipe_init_vertex_funcs(struct pipe_context *pipe) pipe->set_vertex_buffers = softpipe_set_vertex_buffers; pipe->set_index_buffer = softpipe_set_index_buffer; + pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 2eac4c7a82..c09ce19559 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -539,18 +539,31 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size, } +/** + * Do coordinate to array index conversion. For array textures. + */ +static INLINE void +wrap_array_layer(const float coord[4], unsigned size, int layer[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + int c = util_ifloor(coord[ch] + 0.5F); + layer[ch] = CLAMP(c, 0, size - 1); + } +} + /** * Examine the quad's texture coordinates to compute the partial * derivatives w.r.t X and Y, then compute lambda (level of detail). */ static float -compute_lambda_1d(const struct sp_sampler_varient *samp, +compute_lambda_1d(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) { - const struct pipe_resource *texture = samp->texture; + const struct pipe_resource *texture = samp->view->texture; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float rho = MAX2(dsdx, dsdy) * texture->width0; @@ -560,12 +573,12 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, static float -compute_lambda_2d(const struct sp_sampler_varient *samp, +compute_lambda_2d(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) { - const struct pipe_resource *texture = samp->texture; + const struct pipe_resource *texture = samp->view->texture; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); @@ -579,12 +592,12 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, static float -compute_lambda_3d(const struct sp_sampler_varient *samp, +compute_lambda_3d(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) { - const struct pipe_resource *texture = samp->texture; + const struct pipe_resource *texture = samp->view->texture; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); @@ -608,7 +621,7 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, * Since there aren't derivatives to use, just return 0. */ static float -compute_lambda_vert(const struct sp_sampler_varient *samp, +compute_lambda_vert(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) @@ -634,7 +647,7 @@ compute_lambda_vert(const struct sp_sampler_varient *samp, static INLINE const float * -get_texel_2d_no_border(const struct sp_sampler_varient *samp, +get_texel_2d_no_border(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y) { const struct softpipe_tex_cached_tile *tile; @@ -651,16 +664,15 @@ get_texel_2d_no_border(const struct sp_sampler_varient *samp, static INLINE const float * -get_texel_2d(const struct sp_sampler_varient *samp, +get_texel_2d(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y) { - const struct pipe_resource *texture = samp->texture; + const struct pipe_resource *texture = samp->view->texture; unsigned level = addr.bits.level; if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level)) { - return sp_tex_tile_cache_border_color(samp->cache, - samp->sampler->border_color); + return samp->sampler->border_color; } else { return get_texel_2d_no_border( samp, addr, x, y ); @@ -671,7 +683,7 @@ get_texel_2d(const struct sp_sampler_varient *samp, /* Gather a quad of adjacent texels within a tile: */ static INLINE void -get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp, +get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp, union tex_tile_address addr, unsigned x, unsigned y, const float *out[4]) @@ -695,7 +707,7 @@ get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp, /* Gather a quad of potentially non-adjacent texels: */ static INLINE void -get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp, +get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x0, int y0, int x1, int y1, @@ -710,7 +722,7 @@ get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp, /* Can involve a lot of unnecessary checks for border color: */ static INLINE void -get_texel_quad_2d(const struct sp_sampler_varient *samp, +get_texel_quad_2d(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x0, int y0, int x1, int y1, @@ -724,10 +736,10 @@ get_texel_quad_2d(const struct sp_sampler_varient *samp, -/* 3d varients: +/* 3d variants: */ static INLINE const float * -get_texel_3d_no_border(const struct sp_sampler_varient *samp, +get_texel_3d_no_border(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y, int z) { const struct softpipe_tex_cached_tile *tile; @@ -745,17 +757,16 @@ get_texel_3d_no_border(const struct sp_sampler_varient *samp, static INLINE const float * -get_texel_3d(const struct sp_sampler_varient *samp, +get_texel_3d(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y, int z) { - const struct pipe_resource *texture = samp->texture; + const struct pipe_resource *texture = samp->view->texture; unsigned level = addr.bits.level; if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level) || z < 0 || z >= (int) u_minify(texture->depth0, level)) { - return sp_tex_tile_cache_border_color(samp->cache, - samp->sampler->border_color); + return samp->sampler->border_color; } else { return get_texel_3d_no_border( samp, addr, x, y, z ); @@ -763,6 +774,43 @@ get_texel_3d(const struct sp_sampler_varient *samp, } +/* Get texel pointer for 1D array texture */ +static INLINE const float * +get_texel_1d_array(const struct sp_sampler_variant *samp, + union tex_tile_address addr, int x, int y) +{ + const struct pipe_resource *texture = samp->view->texture; + unsigned level = addr.bits.level; + + if (x < 0 || x >= (int) u_minify(texture->width0, level)) { + return samp->sampler->border_color; + } + else { + return get_texel_2d_no_border(samp, addr, x, y); + } +} + + +/* Get texel pointer for 2D array texture */ +static INLINE const float * +get_texel_2d_array(const struct sp_sampler_variant *samp, + union tex_tile_address addr, int x, int y, int layer) +{ + const struct pipe_resource *texture = samp->view->texture; + unsigned level = addr.bits.level; + + assert(layer < texture->array_size); + + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level)) { + return samp->sampler->border_color; + } + else { + return get_texel_3d_no_border(samp, addr, x, y, layer); + } +} + + /** * Given the logbase2 of a mipmap's base level size and a mipmap level, * return the size (in texels) of that mipmap level. @@ -800,7 +848,7 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = pot_level_size(samp->xpot, level); @@ -863,7 +911,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = pot_level_size(samp->xpot, level); @@ -907,7 +955,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = pot_level_size(samp->xpot, level); @@ -960,8 +1008,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; unsigned level0, j; int width; int x[4]; @@ -992,6 +1040,47 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, static void +img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width; + int x[4], layer[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + wrap_array_layer(t, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_1d_array(samp, addr, x[j], layer[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + +static void img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], @@ -1000,8 +1089,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; unsigned level0, j; int width, height; int x[4], y[4]; @@ -1035,6 +1124,50 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width, height; + int x[4], y[4], layer[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + wrap_array_layer(p, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d_array(samp, addr, x[j], y[j], layer[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + static INLINE union tex_tile_address face(union tex_tile_address addr, unsigned face ) { @@ -1052,8 +1185,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; const unsigned *faces = samp->faces; /* zero when not cube-mapping */ unsigned level0, j; int width, height; @@ -1096,8 +1229,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; unsigned level0, j; int width, height, depth; int x[4], y[4], z[4]; @@ -1138,8 +1271,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; unsigned level0, j; int width; int x0[4], x1[4]; @@ -1170,6 +1303,47 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, static void +img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width; + int x0[4], x1[4], layer[4]; + float xw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + wrap_array_layer(t, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_1d_array(samp, addr, x0[j], layer[j]); + const float *tx1 = get_texel_1d_array(samp, addr, x1[j], layer[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]); + } + } +} + + +static void img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], @@ -1178,8 +1352,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; unsigned level0, j; int width, height; int x0[4], y0[4], x1[4], y1[4]; @@ -1217,6 +1391,54 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, static void +img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width, height; + int x0[4], y0[4], x1[4], y1[4], layer[4]; + float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + wrap_array_layer(p, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_2d_array(samp, addr, x0[j], y0[j], layer[j]); + const float *tx1 = get_texel_2d_array(samp, addr, x1[j], y0[j], layer[j]); + const float *tx2 = get_texel_2d_array(samp, addr, x0[j], y1[j], layer[j]); + const float *tx3 = get_texel_2d_array(samp, addr, x1[j], y1[j], layer[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx0[c], tx1[c], + tx2[c], tx3[c]); + } + } +} + + +static void img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], @@ -1225,8 +1447,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; const unsigned *faces = samp->faces; /* zero when not cube-mapping */ unsigned level0, j; int width, height; @@ -1274,8 +1496,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; unsigned level0, j; int width, height, depth; int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; @@ -1350,8 +1572,8 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; int level0; float lambda; float lod[QUAD_SIZE]; @@ -1417,8 +1639,8 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; float lambda; float lod[QUAD_SIZE]; @@ -1460,7 +1682,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); float lambda; float lod[QUAD_SIZE]; @@ -1501,8 +1723,8 @@ mip_filter_linear_2d_linear_repeat_POT( enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - const struct pipe_resource *texture = samp->texture; + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; int level0; float lambda; float lod[QUAD_SIZE]; @@ -1569,10 +1791,11 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_sampler_state *sampler = samp->sampler; int j, k0, k1, k2, k3; float val; + float pc0, pc1, pc2, pc3; samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba); @@ -1582,43 +1805,48 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, * RGBA channels. We look at the red channel here. */ + pc0 = CLAMP(p[0], 0.0F, 1.0F); + pc1 = CLAMP(p[1], 0.0F, 1.0F); + pc2 = CLAMP(p[2], 0.0F, 1.0F); + pc3 = CLAMP(p[3], 0.0F, 1.0F); + /* compare four texcoords vs. four texture samples */ switch (sampler->compare_func) { case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; + k0 = pc0 < rgba[0][0]; + k1 = pc1 < rgba[0][1]; + k2 = pc2 < rgba[0][2]; + k3 = pc3 < rgba[0][3]; break; case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; + k0 = pc0 <= rgba[0][0]; + k1 = pc1 <= rgba[0][1]; + k2 = pc2 <= rgba[0][2]; + k3 = pc3 <= rgba[0][3]; break; case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; + k0 = pc0 > rgba[0][0]; + k1 = pc1 > rgba[0][1]; + k2 = pc2 > rgba[0][2]; + k3 = pc3 > rgba[0][3]; break; case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; + k0 = pc0 >= rgba[0][0]; + k1 = pc1 >= rgba[0][1]; + k2 = pc2 >= rgba[0][2]; + k3 = pc3 >= rgba[0][3]; break; case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; + k0 = pc0 == rgba[0][0]; + k1 = pc1 == rgba[0][1]; + k2 = pc2 == rgba[0][2]; + k3 = pc3 == rgba[0][3]; break; case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; + k0 = pc0 != rgba[0][0]; + k1 = pc1 != rgba[0][1]; + k2 = pc2 != rgba[0][2]; + k3 = pc3 != rgba[0][3]; break; case PIPE_FUNC_ALWAYS: k0 = k1 = k2 = k3 = 1; @@ -1656,7 +1884,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; float ssss[4], tttt[4]; @@ -1731,6 +1959,86 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, } +static void +sample_swizzle(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + float rgba_temp[NUM_CHANNELS][QUAD_SIZE]; + const unsigned swizzle_r = samp->key.bits.swizzle_r; + const unsigned swizzle_g = samp->key.bits.swizzle_g; + const unsigned swizzle_b = samp->key.bits.swizzle_b; + const unsigned swizzle_a = samp->key.bits.swizzle_a; + unsigned j; + + samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp); + + switch (swizzle_r) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[0][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[0][j] = 1.0f; + break; + default: + assert(swizzle_r < 4); + for (j = 0; j < 4; j++) + rgba[0][j] = rgba_temp[swizzle_r][j]; + } + + switch (swizzle_g) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[1][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[1][j] = 1.0f; + break; + default: + assert(swizzle_g < 4); + for (j = 0; j < 4; j++) + rgba[1][j] = rgba_temp[swizzle_g][j]; + } + + switch (swizzle_b) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[2][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[2][j] = 1.0f; + break; + default: + assert(swizzle_b < 4); + for (j = 0; j < 4; j++) + rgba[2][j] = rgba_temp[swizzle_b][j]; + } + + switch (swizzle_a) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[3][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[3][j] = 1.0f; + break; + default: + assert(swizzle_a < 4); + for (j = 0; j < 4; j++) + rgba[3][j] = rgba_temp[swizzle_a][j]; + } +} + static wrap_nearest_func get_nearest_unorm_wrap(unsigned mode) @@ -1828,8 +2136,10 @@ get_lambda_func(const union sp_sampler_key key) switch (key.bits.target) { case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: return compute_lambda_1d; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: return compute_lambda_2d; @@ -1854,6 +2164,12 @@ get_img_filter(const union sp_sampler_key key, else return img_filter_1d_linear; break; + case PIPE_TEXTURE_1D_ARRAY: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_1d_array_nearest; + else + return img_filter_1d_array_linear; + break; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: /* Try for fast path: @@ -1889,6 +2205,12 @@ get_img_filter(const union sp_sampler_key key, else return img_filter_2d_linear; break; + case PIPE_TEXTURE_2D_ARRAY: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_2d_array_nearest; + else + return img_filter_2d_array_linear; + break; case PIPE_TEXTURE_CUBE: if (filter == PIPE_TEX_FILTER_NEAREST) return img_filter_cube_nearest; @@ -1909,16 +2231,17 @@ get_img_filter(const union sp_sampler_key key, /** - * Bind the given texture object and texture cache to the sampler varient. + * Bind the given texture object and texture cache to the sampler variant. */ void -sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, - struct softpipe_tex_tile_cache *tex_cache, - const struct pipe_resource *texture ) +sp_sampler_variant_bind_view( struct sp_sampler_variant *samp, + struct softpipe_tex_tile_cache *tex_cache, + const struct pipe_sampler_view *view ) { const struct pipe_sampler_state *sampler = samp->sampler; + const struct pipe_resource *texture = view->texture; - samp->texture = texture; + samp->view = view; samp->cache = tex_cache; samp->xpot = util_unsigned_logbase2( texture->width0 ); samp->ypot = util_unsigned_logbase2( texture->height0 ); @@ -1927,20 +2250,20 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, void -sp_sampler_varient_destroy( struct sp_sampler_varient *samp ) +sp_sampler_variant_destroy( struct sp_sampler_variant *samp ) { FREE(samp); } /** - * Create a sampler varient for a given set of non-orthogonal state. + * Create a sampler variant for a given set of non-orthogonal state. */ -struct sp_sampler_varient * -sp_create_sampler_varient( const struct pipe_sampler_state *sampler, +struct sp_sampler_variant * +sp_create_sampler_variant( const struct pipe_sampler_state *sampler, const union sp_sampler_key key ) { - struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient); + struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant); if (!samp) return NULL; @@ -2015,7 +2338,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, } if (key.bits.target == PIPE_TEXTURE_CUBE) { - samp->base.get_samples = sample_cube; + samp->sample_target = sample_cube; } else { samp->faces[0] = 0; @@ -2026,7 +2349,17 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, /* Skip cube face determination by promoting the compare * function pointer: */ - samp->base.get_samples = samp->compare; + samp->sample_target = samp->compare; + } + + if (key.bits.swizzle_r != PIPE_SWIZZLE_RED || + key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || + key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || + key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) { + samp->base.get_samples = sample_swizzle; + } + else { + samp->base.get_samples = samp->sample_target; } return samp; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 6114acf737..f0b867edc6 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -32,7 +32,7 @@ #include "tgsi/tgsi_exec.h" -struct sp_sampler_varient; +struct sp_sampler_variant; typedef void (*wrap_nearest_func)(const float s[4], unsigned size, @@ -44,7 +44,7 @@ typedef void (*wrap_linear_func)(const float s[4], int icoord1[4], float w[4]); -typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, +typedef float (*compute_lambda_func)(const struct sp_sampler_variant *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]); @@ -64,7 +64,11 @@ union sp_sampler_key { unsigned is_pot:1; unsigned processor:2; unsigned unit:4; - unsigned pad:22; + unsigned swizzle_r:3; + unsigned swizzle_g:3; + unsigned swizzle_b:3; + unsigned swizzle_a:3; + unsigned pad:10; } bits; unsigned value; }; @@ -72,7 +76,7 @@ union sp_sampler_key { /** * Subclass of tgsi_sampler */ -struct sp_sampler_varient +struct sp_sampler_variant { struct tgsi_sampler base; /**< base class */ @@ -85,7 +89,7 @@ struct sp_sampler_varient /* Currently bound texture: */ - const struct pipe_resource *texture; + const struct pipe_sampler_view *view; struct softpipe_tex_tile_cache *cache; unsigned processor; @@ -113,32 +117,33 @@ struct sp_sampler_varient filter_func mip_filter; filter_func compare; + filter_func sample_target; /* Linked list: */ - struct sp_sampler_varient *next; + struct sp_sampler_variant *next; }; struct sp_sampler; -/* Create a sampler varient for a given set of non-orthogonal state. Currently the +/* Create a sampler variant for a given set of non-orthogonal state. Currently the */ -struct sp_sampler_varient * -sp_create_sampler_varient( const struct pipe_sampler_state *sampler, +struct sp_sampler_variant * +sp_create_sampler_variant( const struct pipe_sampler_state *sampler, const union sp_sampler_key key ); -void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient, - struct softpipe_tex_tile_cache *tex_cache, - const struct pipe_resource *tex ); +void sp_sampler_variant_bind_view( struct sp_sampler_variant *variant, + struct softpipe_tex_tile_cache *tex_cache, + const struct pipe_sampler_view *view ); -void sp_sampler_varient_destroy( struct sp_sampler_varient * ); +void sp_sampler_variant_destroy( struct sp_sampler_variant * ); -static INLINE struct sp_sampler_varient * -sp_sampler_varient(const struct tgsi_sampler *sampler) +static INLINE struct sp_sampler_variant * +sp_sampler_variant(const struct tgsi_sampler *sampler) { - return (struct sp_sampler_varient *) sampler; + return (struct sp_sampler_variant *) sampler; } extern void diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index 1393164150..e589ee7c84 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -251,6 +251,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_level != addr.bits.level || tc->tex_z != addr.bits.z) { /* get new transfer (view into texture) */ + unsigned width, height, layer; if (tc->tex_trans) { if (tc->tex_trans_map) { @@ -262,14 +263,22 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_trans = NULL; } + width = u_minify(tc->texture->width0, addr.bits.level); + if (tc->texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = tc->texture->array_size; + layer = 0; + } + else { + height = u_minify(tc->texture->height0, addr.bits.level); + layer = addr.bits.face + addr.bits.z; + } + tc->tex_trans = pipe_get_transfer(tc->pipe, tc->texture, addr.bits.level, - addr.bits.face + addr.bits.z, + layer, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, - 0, 0, - u_minify(tc->texture->width0, addr.bits.level), - u_minify(tc->texture->height0, addr.bits.level)); + 0, 0, width, height); tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); @@ -278,45 +287,21 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_z = addr.bits.z; } - /* get tile from the transfer (view into texture) */ - pipe_get_tile_swizzle(tc->pipe, - tc->tex_trans, - addr.bits.x * TILE_SIZE, - addr.bits.y * TILE_SIZE, - TILE_SIZE, - TILE_SIZE, - tc->swizzle_r, - tc->swizzle_g, - tc->swizzle_b, - tc->swizzle_a, - tc->format, - (float *) tile->data.color); + /* Get tile from the transfer (view into texture), explicitly passing + * the image format. + */ + pipe_get_tile_rgba_format(tc->pipe, + tc->tex_trans, + addr.bits.x * TILE_SIZE, + addr.bits.y * TILE_SIZE, + TILE_SIZE, + TILE_SIZE, + tc->format, + (float *) tile->data.color); + tile->addr = addr; } tc->last_tile = tile; return tile; } - - - -/** - * Return the swizzled border color. - */ -const float * -sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, - const float border_color[4]) -{ - float rgba01[6]; - - COPY_4V(rgba01, border_color); - rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; - rgba01[PIPE_SWIZZLE_ONE] = 1.0f; - - tc->swz_border_color[0] = rgba01[tc->swizzle_r]; - tc->swz_border_color[1] = rgba01[tc->swizzle_g]; - tc->swz_border_color[2] = rgba01[tc->swizzle_b]; - tc->swz_border_color[3] = rgba01[tc->swizzle_a]; - - return tc->swz_border_color; -} diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index e0b66bf3f7..9bced37990 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -92,11 +92,9 @@ struct softpipe_tex_tile_cache unsigned swizzle_g; unsigned swizzle_b; unsigned swizzle_a; - unsigned format; + enum pipe_format format; struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ - - float swz_border_color[4]; /**< swizzled border color */ }; @@ -161,10 +159,5 @@ sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } -const float * -sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, - const float border_color[4]); - - #endif /* SP_TEX_TILE_CACHE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 509d9982b1..95374c34ec 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -62,13 +62,21 @@ softpipe_resource_layout(struct pipe_screen *screen, unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { + unsigned slices; + + if (pt->target == PIPE_TEXTURE_CUBE) + slices = 6; + else if (pt->target == PIPE_TEXTURE_3D) + slices = depth; + else + slices = pt->array_size; + spr->stride[level] = util_format_get_stride(pt->format, width); spr->level_offset[level] = buffer_size; buffer_size += (util_format_get_nblocksy(pt->format, height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - spr->stride[level]); + slices * spr->stride[level]); width = u_minify(width, 1); height = u_minify(height, 1); @@ -227,9 +235,13 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr, unsigned offset = spr->level_offset[level]; if (spr->base.target == PIPE_TEXTURE_CUBE || - spr->base.target == PIPE_TEXTURE_3D) { + spr->base.target == PIPE_TEXTURE_3D || + spr->base.target == PIPE_TEXTURE_2D_ARRAY) { offset += layer * nblocksy * spr->stride[level]; } + else if (spr->base.target == PIPE_TEXTURE_1D_ARRAY) { + offset += layer * spr->stride[level]; + } else { assert(layer == 0); } @@ -292,7 +304,7 @@ softpipe_surface_destroy(struct pipe_context *pipe, * a resource object. * \param pipe rendering context * \param resource the resource to transfer in/out of - * \param sr indicates cube face or 3D texture slice + * \param level which mipmap level * \param usage bitmask of PIPE_TRANSFER_x flags * \param box the 1D/2D/3D region of interest */ @@ -311,8 +323,21 @@ softpipe_get_transfer(struct pipe_context *pipe, /* make sure the requested region is in the image bounds */ assert(box->x + box->width <= u_minify(resource->width0, level)); - assert(box->y + box->height <= u_minify(resource->height0, level)); - assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); + if (resource->target == PIPE_TEXTURE_1D_ARRAY) { + assert(box->y + box->height <= resource->array_size); + } + else { + assert(box->y + box->height <= u_minify(resource->height0, level)); + if (resource->target == PIPE_TEXTURE_2D_ARRAY) { + assert(box->z + box->depth <= resource->array_size); + } + else if (resource->target == PIPE_TEXTURE_CUBE) { + assert(box->z < 6); + } + else { + assert(box->z + box->depth <= (u_minify(resource->depth0, level))); + } + } /* * Transfers, like other pipe operations, must happen in order, so flush the diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 480860af63..60870b8bee 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -357,11 +357,12 @@ sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos) tc->entries[pos]->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(tc->pipe, tc->transfer, - tc->tile_addrs[pos].bits.x * TILE_SIZE, - tc->tile_addrs[pos].bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tc->entries[pos]->data.color); + pipe_put_tile_rgba_format(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tc->entries[pos]->data.color); } tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */ } @@ -468,11 +469,12 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(tc->pipe, pt, - tc->tile_addrs[pos].bits.x * TILE_SIZE, - tc->tile_addrs[pos].bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); + pipe_put_tile_rgba_format(tc->pipe, pt, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tile->data.color); } } diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index 05eab8a517..1ed1d5d25b 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -422,7 +422,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, struct svga_transfer *st, // IN SVGA3dTransferType transfer, // IN const SVGA3dCopyBox *boxes, // IN - uint32 numBoxes) // IN + uint32 numBoxes, // IN + SVGA3dSurfaceDMAFlags flags) // IN { struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCmdSurfaceDMA *cmd; @@ -465,7 +466,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize); pSuffix->suffixSize = sizeof *pSuffix; pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride; - memset(&pSuffix->flags, 0, sizeof pSuffix->flags); + pSuffix->flags = flags; swc->commit(swc); diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h index 0e568d78e6..223ab17df8 100644 --- a/src/gallium/drivers/svga/svga_cmd.h +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -102,7 +102,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, struct svga_transfer *st, SVGA3dTransferType transfer, const SVGA3dCopyBox *boxes, - uint32 numBoxes); + uint32 numBoxes, + SVGA3dSurfaceDMAFlags flags); enum pipe_error SVGA3D_BufferDMA(struct svga_winsys_context *swc, diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 1e513f1039..4782b4bf70 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -34,6 +34,7 @@ #include "svga_context.h" #include "svga_screen.h" +#include "svga_surface.h" #include "svga_resource_texture.h" #include "svga_resource_buffer.h" #include "svga_resource.h" @@ -43,6 +44,12 @@ #include "svga_debug.h" #include "svga_state.h" +DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(use_min_mipmap, "SVGA_USE_MIN_MIPMAP", FALSE); +DEBUG_GET_ONCE_NUM_OPTION(disable_shader, "SVGA_DISABLE_SHADER", ~0); +DEBUG_GET_ONCE_BOOL_OPTION(no_line_width, "SVGA_NO_LINE_WIDTH", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE", FALSE); static void svga_destroy( struct pipe_context *pipe ) { @@ -113,13 +120,12 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, /* debug */ - svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); - svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); - svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE); - svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); - - if (!svga_init_swtnl(svga)) - goto no_swtnl; + svga->debug.no_swtnl = debug_get_option_no_swtnl(); + svga->debug.force_swtnl = debug_get_option_force_swtnl(); + svga->debug.use_min_mipmap = debug_get_option_use_min_mipmap(); + svga->debug.disable_shader = debug_get_option_disable_shader(); + svga->debug.no_line_width = debug_get_option_no_line_width(); + svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple(); svga->fs_bm = util_bitmask_create(); if (svga->fs_bm == NULL) @@ -149,6 +155,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, if (svga->hwtnl == NULL) goto no_hwtnl; + if (!svga_init_swtnl(svga)) + goto no_swtnl; ret = svga_emit_initial_state( svga ); if (ret) @@ -171,6 +179,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, return &svga->pipe; no_state: + svga_destroy_swtnl(svga); +no_swtnl: svga_hwtnl_destroy( svga->hwtnl ); no_hwtnl: u_upload_destroy( svga->upload_vb ); @@ -181,8 +191,6 @@ no_upload_ib: no_vs_bm: util_bitmask_destroy( svga->fs_bm ); no_fs_bm: - svga_destroy_swtnl(svga); -no_swtnl: svga->swc->destroy(svga->swc); no_swc: FREE(svga); @@ -196,14 +204,10 @@ void svga_context_flush( struct svga_context *svga, { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct pipe_fence_handle *fence = NULL; + enum pipe_error ret; svga->curr.nr_fbs = 0; - /* Unmap upload manager buffers: - */ - u_upload_flush(svga->upload_vb); - u_upload_flush(svga->upload_ib); - /* Ensure that texture dma uploads are processed * before submitting commands. */ @@ -220,9 +224,25 @@ void svga_context_flush( struct svga_context *svga, */ svga->dirty |= SVGA_NEW_COMMAND_BUFFER; + /* + * We must reemit the surface bindings here, because svga_update_state + * will always flush the primitives before processing the + * SVGA_NEW_COMMAND_BUFFER state change. + * + * TODO: Refactor this. + */ + ret = svga_reemit_framebuffer_bindings(svga); + assert(ret == PIPE_OK); + + ret = svga_reemit_tss_bindings(svga); + assert(ret == PIPE_OK); + + svga->dirty &= ~SVGA_NEW_COMMAND_BUFFER; + if (SVGA_DEBUG & DEBUG_SYNC) { if (fence) - svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0); + svga->pipe.screen->fence_finish( svga->pipe.screen, fence, + PIPE_TIMEOUT_INFINITE); } if(pfence) @@ -245,6 +265,30 @@ void svga_hwtnl_flush_retry( struct svga_context *svga ) assert(ret == 0); } + +/* Emit all operations pending on host surfaces. + */ +void svga_surfaces_flush(struct svga_context *svga) +{ + unsigned i; + + /* Emit buffered drawing commands. + */ + svga_hwtnl_flush_retry( svga ); + + /* Emit back-copy from render target view to texture. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_propagate_surface(svga, svga->curr.framebuffer.cbufs[i]); + } + + if (svga->curr.framebuffer.zsbuf) + svga_propagate_surface(svga, svga->curr.framebuffer.zsbuf); + +} + + struct svga_winsys_context * svga_winsys_context( struct pipe_context *pipe ) { diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 04e281a506..7b36a3606e 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -35,6 +35,8 @@ #include "tgsi/tgsi_scan.h" +#include "svga_state.h" + #define SVGA_TEX_UNITS 8 #define SVGA_MAX_POINTSIZE 80.0 @@ -147,7 +149,14 @@ struct svga_rasterizer_state { float pointsize; unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */ - unsigned need_pipeline:16; /* which prims do we need help for? */ + + /** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */ + unsigned need_pipeline:16; + + /** For debugging: */ + const char* need_pipeline_tris_str; + const char* need_pipeline_lines_str; + const char* need_pipeline_points_str; }; struct svga_sampler_state { @@ -237,7 +246,7 @@ struct svga_prescale { }; -/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT ) +/* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR ) */ struct svga_hw_clear_state { @@ -288,6 +297,11 @@ struct svga_sw_state boolean need_swvfetch; boolean need_pipeline; boolean need_swtnl; + + /* Flag to make sure that need sw is on while + * updating state within a swtnl call. + */ + boolean in_swtnl_draw; }; @@ -312,6 +326,9 @@ struct svga_context unsigned shader_id; unsigned disable_shader; + + boolean no_line_width; + boolean force_hw_line_stipple; } debug; struct { @@ -327,7 +344,7 @@ struct svga_context struct util_bitmask *vs_bm; struct { - unsigned dirty[4]; + unsigned dirty[SVGA_STATE_MAX]; unsigned texture_timestamp; @@ -350,6 +367,9 @@ struct svga_context /** List of buffers with queued transfers */ struct list_head dirty_buffers; + + /** Was the previous draw done with the SW path? */ + boolean prev_draw_swtnl; }; /* A flag for each state_tracker state object: @@ -433,6 +453,8 @@ void svga_context_flush( struct svga_context *svga, void svga_hwtnl_flush_retry( struct svga_context *svga ); +void svga_surfaces_flush(struct svga_context *svga); + struct pipe_context * svga_context_create(struct pipe_screen *screen, void *priv); diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 81dd4778d0..2c873a0f7a 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -28,6 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_upload_mgr.h" #include "svga_context.h" #include "svga_draw.h" @@ -143,6 +144,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) SVGA3dPrimitiveRange *prim; unsigned i; + /* Unmap upload manager vertex buffers */ + u_upload_flush(svga->upload_vb); + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); if (handle == NULL) @@ -151,6 +155,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) vb_handle[i] = handle; } + /* Unmap upload manager index buffers */ + u_upload_flush(svga->upload_ib); + for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); @@ -315,7 +322,6 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, break; } - assert(!stride || width <= stride); if (max_index != ~0) { assert(offset + (index_bias + max_index) * stride + width <= size); } diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index da33fae62f..a6518042eb 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -53,6 +53,7 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, dst = pipe_buffer_create( pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, size ); if (dst == NULL) goto fail; @@ -65,14 +66,14 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, generate( nr, dst_map ); - pipe_buffer_unmap( pipe, dst, transfer ); + pipe_buffer_unmap( pipe, transfer ); *out_buf = dst; return PIPE_OK; fail: if (dst_map) - pipe_buffer_unmap( pipe, dst, transfer ); + pipe_buffer_unmap( pipe, transfer ); if (dst) pipe->screen->resource_destroy( pipe->screen, dst ); diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index c4579177b7..7d420c6b29 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -56,6 +56,7 @@ translate_indices( struct svga_hwtnl *hwtnl, dst = pipe_buffer_create( pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, size ); if (dst == NULL) goto fail; @@ -72,18 +73,18 @@ translate_indices( struct svga_hwtnl *hwtnl, nr, dst_map ); - pipe_buffer_unmap( pipe, src, src_transfer ); - pipe_buffer_unmap( pipe, dst, dst_transfer ); + pipe_buffer_unmap( pipe, src_transfer ); + pipe_buffer_unmap( pipe, dst_transfer ); *out_buf = dst; return PIPE_OK; fail: if (src_map) - pipe_buffer_unmap( pipe, src, src_transfer ); + pipe_buffer_unmap( pipe, src_transfer ); if (dst_map) - pipe_buffer_unmap( pipe, dst, dst_transfer ); + pipe_buffer_unmap( pipe, dst_transfer ); if (dst) pipe->screen->resource_destroy( pipe->screen, dst ); @@ -120,14 +121,17 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, if (index_buffer && svga_buffer_is_user_buffer(index_buffer)) { + boolean flushed; assert( index_buffer->width0 >= index_offset + count * index_size ); ret = u_upload_buffer( hwtnl->upload_ib, + 0, index_offset, count * index_size, index_buffer, &index_offset, - &upload_buffer ); + &upload_buffer, + &flushed ); if (ret) goto done; diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index 426698806c..c87afb6946 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -50,7 +50,9 @@ static void svga_surface_copy(struct pipe_context *pipe, struct pipe_surface *srcsurf, *dstsurf;*/ unsigned dst_face, dst_z, src_face, src_z; - svga_hwtnl_flush_retry( svga ); + /* Emit buffered drawing commands, and any back copies. + */ + svga_surfaces_flush( svga ); #if 0 srcsurf = screen->get_tex_surface(screen, src_tex, diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 001ec3616c..fda5c28433 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -157,6 +157,14 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (!u_trim_pipe_prim( info->mode, &count )) return; + if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) { + /* We're switching between SW and HW drawing. Do a flush to avoid + * mixing HW and SW rendering with the same vertex buffer. + */ + pipe->flush(pipe, NULL); + svga->prev_draw_swtnl = svga->state.sw.need_swtnl; + } + /* * Mark currently bound target surfaces as dirty * doesn't really matter if it is done before drawing. diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index ab243aa6ec..4578c136cb 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -24,6 +24,7 @@ **********************************************************/ #include "pipe/p_defines.h" +#include "util/u_string.h" #include "svga_screen.h" #include "svga_surface.h" #include "svga_context.h" @@ -31,31 +32,40 @@ static void svga_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { struct svga_context *svga = svga_context(pipe); - int i; - /* Emit buffered drawing commands. + /* Emit buffered drawing commands, and any back copies. */ - svga_hwtnl_flush_retry( svga ); - - /* Emit back-copy from render target view to texture. - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (svga->curr.framebuffer.cbufs[i]) - svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]); - } - if (svga->curr.framebuffer.zsbuf) - svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf); + svga_surfaces_flush( svga ); /* Flush command queue. */ svga_context_flush(svga, fence); - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n", - __FUNCTION__, flags, fence ? *fence : 0x0); + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", + __FUNCTION__, fence ? *fence : 0x0); + + /* Enable to dump BMPs of the color/depth buffers each frame */ + if (0) { + struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]); + } + + if (0 && fb->zsbuf) { + util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf); + } + + ++frame_no; + } } diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index 8c24fb302f..440919c626 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -94,7 +94,7 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) - svga_propagate_surface(pipe, dst->cbufs[i]); + svga_propagate_surface(svga, dst->cbufs[i]); } /* XXX: Actually the virtual hardware may support rendertargets with diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 660eb0757a..4a1a37f176 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -64,18 +64,19 @@ static void * svga_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) { + struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); + /* need this for draw module. */ rast->templ = *templ; - /* light_twoside - XXX: need fragment shader varient */ + /* light_twoside - XXX: need fragment shader variant */ /* poly_smooth - XXX: no fallback available */ /* poly_stipple_enable - draw module */ /* sprite_coord_enable - ? */ /* point_quad_rasterization - ? */ /* point_size_per_vertex - ? */ /* sprite_coord_mode - ??? */ - /* bypass_vs_viewport_and_clip - handled by viewport setup */ /* flatshade_first - handled by index translation */ /* gl_rasterization_rules - XXX - viewport code */ /* line_width - draw module */ @@ -93,17 +94,22 @@ svga_create_rasterizer_state(struct pipe_context *pipe, /* Use swtnl + decomposition implement these: */ - if (templ->poly_stipple_enable) + if (templ->poly_stipple_enable) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "poly stipple"; + } - if (templ->line_width != 1.0 && - templ->line_width != 0.0) + if (templ->line_width >= 1.5f && + !svga->debug.no_line_width) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "line width"; + } if (templ->line_stipple_enable) { - /* LinePattern not implemented on all backends. + /* XXX: LinePattern not implemented on all backends, and there is no + * mechanism to query it. */ - if (0) { + if (!svga->debug.force_hw_line_stipple) { SVGA3dLinePattern lp; lp.repeat = templ->line_stipple_factor + 1; lp.pattern = templ->line_stipple_pattern; @@ -111,11 +117,19 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } else { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "line stipple"; } } - if (templ->point_smooth) + if (templ->point_smooth) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; + rast->need_pipeline_points_str = "smooth points"; + } + + if (templ->line_smooth) { + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "smooth lines"; + } { int fill_front = templ->fill_front; @@ -148,6 +162,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, * front/back fill modes: */ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "different front/back fillmodes"; } else { offset = offset_front; @@ -172,6 +187,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "unfilled primitives with no index manipulation"; } /* If we are decomposing to lines, and lines need the pipeline, @@ -182,6 +198,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "decomposing lines"; } /* Similarly for points: @@ -191,6 +208,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "decomposing points"; } if (offset) { @@ -201,9 +219,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->hw_unfilled = fill; } - - - if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { /* Turn off stuff which will get done in the draw module: */ diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index f44a0e1325..446fcc4407 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -144,8 +144,9 @@ svga_create_sampler_state(struct pipe_context *pipe, return cso; } -static void svga_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +static void +svga_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct svga_context *svga = svga_context(pipe); unsigned i; @@ -203,9 +204,10 @@ svga_sampler_view_destroy(struct pipe_context *pipe, FREE(view); } -static void svga_set_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) +static void +svga_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct svga_context *svga = svga_context(pipe); unsigned flag_1d = 0; @@ -256,9 +258,9 @@ static void svga_set_sampler_views(struct pipe_context *pipe, void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; - svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states; + svga->pipe.bind_fragment_sampler_states = svga_bind_fragment_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; - svga->pipe.set_fragment_sampler_views = svga_set_sampler_views; + svga->pipe.set_fragment_sampler_views = svga_set_fragment_sampler_views; svga->pipe.create_sampler_view = svga_create_sampler_view; svga->pipe.sampler_view_destroy = svga_sampler_view_destroy; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 86c79459f3..5846991073 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "svga_screen.h" diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index ef2a0c40f0..6e0622a312 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -33,13 +33,13 @@ svga_resource_from_handle(struct pipe_screen * screen, void svga_init_resource_functions(struct svga_context *svga) { - svga->pipe.is_resource_referenced = u_is_resource_referenced_vtbl; svga->pipe.get_transfer = u_get_transfer_vtbl; svga->pipe.transfer_map = u_transfer_map_vtbl; svga->pipe.transfer_flush_region = u_transfer_flush_region_vtbl; svga->pipe.transfer_unmap = u_transfer_unmap_vtbl; svga->pipe.transfer_destroy = u_transfer_destroy_vtbl; svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl; + svga->pipe.redefine_user_buffer = svga_redefine_user_buffer; } void diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index f12e2b6862..2d7c524d86 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -51,53 +51,104 @@ svga_buffer_needs_hw_storage(unsigned usage) } -static unsigned int -svga_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *buf, - unsigned level, int layer) +/** + * Map a range of a buffer. + * + * Unlike texture DMAs (which are written immediately to the command buffer and + * therefore inherently serialized with other context operations), for buffers + * we try to coalesce multiple range mappings (i.e, multiple calls to this + * function) into a single DMA command, for better efficiency in command + * processing. This means we need to exercise extra care here to ensure that + * the end result is exactly the same as if one DMA was used for every mapped + * range. + */ +static void * +svga_buffer_map_range( struct pipe_context *pipe, + struct pipe_resource *buf, + unsigned offset, + unsigned length, + unsigned usage ) { + struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_buffer *sbuf = svga_buffer(buf); - - /** - * XXX: Check this. - * The screen may cache buffer writes, but when we map, we map out - * of those cached writes, so we don't need to set a - * PIPE_REFERENCED_FOR_WRITE flag for cached buffers. - */ - - if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle)) - return PIPE_UNREFERENCED; - - /** - * sws->surface_is_flushed() does not distinguish between read references - * and write references. So assume a reference is both, - * however, we make an exception for index- and vertex buffers, to avoid - * a flush in st_bufferobj_get_subdata, during display list replay. - */ + struct svga_winsys_screen *sws = ss->sws; + struct svga_buffer *sbuf = svga_buffer( buf ); + void *map; - if (sbuf->b.b.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - return PIPE_REFERENCED_FOR_READ; + if (usage & PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + /* + * Finish writing any pending DMA commands, and tell the host to discard + * the buffer contents on the next DMA operation. + */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + /* + * Instead of flushing the context command buffer, simply discard + * the current hwbuf, and start a new one. + */ + svga_buffer_destroy_hw_storage(ss, sbuf); + } + sbuf->map.num_ranges = 0; + sbuf->dma.flags.discard = TRUE; + } + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + if (!sbuf->map.num_ranges) { + /* + * No pending ranges to upload so far, so we can tell the host to + * not synchronize on the next DMA command. + */ + sbuf->dma.flags.unsynchronized = TRUE; + } + } else { + /* + * Synchronizing, so finish writing any pending DMA command, and + * ensure the next DMA will be done in order. + */ -static void * -svga_buffer_map_range( struct pipe_screen *screen, - struct pipe_resource *buf, - unsigned offset, - unsigned length, - unsigned usage ) -{ - struct svga_screen *ss = svga_screen(screen); - struct svga_winsys_screen *sws = ss->sws; - struct svga_buffer *sbuf = svga_buffer( buf ); - void *map; + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + + if (sbuf->hwbuf) { + /* + * We have a pending DMA upload from a hardware buffer, therefore + * we need to ensure that the host finishes processing that DMA + * command before the state tracker can start overwriting the + * hardware buffer. + * + * XXX: This could be avoided by tying the hardware buffer to + * the transfer (just as done with textures), which would allow + * overlapping DMAs commands to be queued on the same context + * buffer. However, due to the likelihood of software vertex + * processing, it is more convenient to hold on to the hardware + * buffer, allowing to quickly access the contents from the CPU + * without having to do a DMA download from the host. + */ + + if (usage & PIPE_TRANSFER_DONTBLOCK) { + /* + * Flushing the command buffer here will most likely cause + * the map of the hwbuf below to block, so preemptively + * return NULL here if DONTBLOCK is set to prevent unnecessary + * command buffer flushes. + */ + + return NULL; + } + + svga_context_flush(svga, NULL); + } + } + + sbuf->dma.flags.unsynchronized = FALSE; + } + } if (!sbuf->swbuf && !sbuf->hwbuf) { if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { @@ -105,9 +156,12 @@ svga_buffer_map_range( struct pipe_screen *screen, * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n", - __FUNCTION__, - (sbuf->b.b.width0 + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting DMA transfers\n", + __FUNCTION__, + (sbuf->b.b.width0 + 1023)/1024); + } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); } @@ -141,12 +195,12 @@ svga_buffer_map_range( struct pipe_screen *screen, static void -svga_buffer_flush_mapped_range( struct pipe_screen *screen, +svga_buffer_flush_mapped_range( struct pipe_context *pipe, struct pipe_resource *buf, unsigned offset, unsigned length) { struct svga_buffer *sbuf = svga_buffer( buf ); - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); pipe_mutex_lock(ss->swc_mutex); assert(sbuf->map.writing); @@ -158,10 +212,10 @@ svga_buffer_flush_mapped_range( struct pipe_screen *screen, } static void -svga_buffer_unmap( struct pipe_screen *screen, +svga_buffer_unmap( struct pipe_context *pipe, struct pipe_resource *buf) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_buffer *sbuf = svga_buffer( buf ); @@ -174,11 +228,18 @@ svga_buffer_unmap( struct pipe_screen *screen, if(sbuf->hwbuf) sws->buffer_unmap(sws, sbuf->hwbuf); - if(sbuf->map.writing) { - if(!sbuf->map.flush_explicit) { - /* No mapped range was flushed -- flush the whole buffer */ + if (sbuf->map.writing) { + if (!sbuf->map.flush_explicit) { + /* + * Mapped range not flushed explicitly, so flush the whole buffer, + * and tell the host to discard the contents when processing the DMA + * command. + */ + SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); + sbuf->dma.flags.discard = TRUE; + svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } @@ -225,7 +286,7 @@ static void * svga_buffer_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - uint8_t *map = svga_buffer_map_range( pipe->screen, + uint8_t *map = svga_buffer_map_range( pipe, transfer->resource, transfer->box.x, transfer->box.width, @@ -248,7 +309,7 @@ static void svga_buffer_transfer_flush_region( struct pipe_context *pipe, { assert(box->x + box->width <= transfer->box.width); - svga_buffer_flush_mapped_range(pipe->screen, + svga_buffer_flush_mapped_range(pipe, transfer->resource, transfer->box.x + box->x, box->width); @@ -257,7 +318,7 @@ static void svga_buffer_transfer_flush_region( struct pipe_context *pipe, static void svga_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - svga_buffer_unmap(pipe->screen, + svga_buffer_unmap(pipe, transfer->resource); } @@ -271,7 +332,6 @@ struct u_resource_vtbl svga_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ svga_buffer_destroy, /* resource_destroy */ - svga_buffer_is_referenced, /* is_resource_referenced */ u_default_get_transfer, /* get_transfer */ u_default_transfer_destroy, /* transfer_destroy */ svga_buffer_transfer_map, /* transfer_map */ @@ -308,6 +368,9 @@ svga_buffer_create(struct pipe_screen *screen, goto error2; } + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + return &sbuf->b.b; error2: @@ -341,6 +404,9 @@ svga_user_buffer_create(struct pipe_screen *screen, sbuf->swbuf = ptr; sbuf->user = TRUE; + + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); return &sbuf->b.b; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index d3ec11bfd5..c559f70ec1 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -243,4 +243,10 @@ svga_winsys_buffer_create(struct svga_context *svga, unsigned usage, unsigned size); +void +svga_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); + #endif /* SVGA_BUFFER_H */ diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 3de5216a94..0bfa8a14a6 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,6 +40,9 @@ #include "svga_debug.h" +#define MAX_DMA_SIZE (4 * 1024 * 1024) + + /** * Allocate a winsys_buffer (ie. DMA, aka GMR memory). * @@ -57,6 +60,13 @@ svga_winsys_buffer_create( struct svga_context *svga, struct svga_winsys_screen *sws = svgascreen->sws; struct svga_winsys_buffer *buf; + /* XXX this shouldn't be a hard-coded number; it should be queried + * somehow. + */ + if (size > MAX_DMA_SIZE) { + return NULL; + } + /* Just try */ buf = sws->buffer_create(sws, alignment, usage, size); if(!buf) { @@ -242,12 +252,17 @@ svga_buffer_upload_command(struct svga_context *svga, * Patch up the upload DMA command reserved by svga_buffer_upload_command * with the final ranges. */ -static void +void svga_buffer_upload_flush(struct svga_context *svga, struct svga_buffer *sbuf) { SVGA3dCopyBox *boxes; unsigned i; + struct pipe_resource *dummy; + + if (!sbuf->dma.pending) { + return; + } assert(sbuf->handle); assert(sbuf->hwbuf); @@ -285,17 +300,18 @@ svga_buffer_upload_flush(struct svga_context *svga, sbuf->head.next = sbuf->head.prev = NULL; #endif sbuf->dma.pending = FALSE; + sbuf->dma.flags.discard = FALSE; + sbuf->dma.flags.unsynchronized = FALSE; sbuf->dma.svga = NULL; sbuf->dma.boxes = NULL; - /* Decrement reference count */ - pipe_reference(&(sbuf->b.b.reference), NULL); - sbuf = NULL; + /* Decrement reference count (and potentially destroy) */ + dummy = &sbuf->b.b; + pipe_resource_reference(&dummy, NULL); } - /** * Note a dirty range. * @@ -326,12 +342,6 @@ svga_buffer_add_range(struct svga_buffer *sbuf, /* * Try to grow one of the ranges. - * - * Note that it is not this function task to care about overlapping ranges, - * as the GMR was already given so it is too late to do anything. Situations - * where overlapping ranges may pose a problem should be detected via - * pipe_context::is_resource_referenced and the context that refers to the - * buffer should be flushed. */ for(i = 0; i < sbuf->map.num_ranges; ++i) { @@ -346,6 +356,11 @@ svga_buffer_add_range(struct svga_buffer *sbuf, if (dist <= 0) { /* * Ranges are contiguous or overlapping -- extend this one and return. + * + * Note that it is not this function's task to prevent overlapping + * ranges, as the GMR was already given so it is too late to do + * anything. If the ranges overlap here it must surely be because + * PIPE_TRANSFER_UNSYNCHRONIZED was set. */ sbuf->map.ranges[i].start = MIN2(sbuf->map.ranges[i].start, start); @@ -369,8 +384,7 @@ svga_buffer_add_range(struct svga_buffer *sbuf, * pending DMA upload and start clean. */ - if(sbuf->dma.pending) - svga_buffer_upload_flush(sbuf->dma.svga, sbuf); + svga_buffer_upload_flush(sbuf->dma.svga, sbuf); assert(!sbuf->dma.pending); assert(!sbuf->dma.svga); @@ -638,3 +652,54 @@ svga_context_flush_buffers(struct svga_context *svga) next = curr->next; } } + + +void +svga_redefine_user_buffer(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_buffer *sbuf = svga_buffer(resource); + + assert(sbuf->user); + + /* + * Release any uploaded user buffer. + * + * TODO: As an optimization, we could try to update the uploaded buffer + * instead. + */ + + pipe_resource_reference(&sbuf->uploaded.buffer, NULL); + + pipe_mutex_lock(ss->swc_mutex); + + if (offset + size > resource->width0) { + /* + * User buffers shouldn't have DMA directly, unless + * SVGA_COMBINE_USERBUFFERS is not set. + */ + + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + } + + if (sbuf->handle) { + svga_buffer_destroy_host_surface(ss, sbuf); + } + + if (sbuf->hwbuf) { + svga_buffer_destroy_hw_storage(ss, sbuf); + } + + sbuf->key.size.width = sbuf->b.b.width0 = offset + size; + } + + pipe_mutex_unlock(ss->swc_mutex); + + svga->curr.any_user_vertex_buffers = TRUE; + svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT; +} diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.h b/src/gallium/drivers/svga/svga_resource_buffer_upload.h index 11df306526..13d8f3e299 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.h +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.h @@ -28,6 +28,10 @@ void +svga_buffer_upload_flush(struct svga_context *svga, + struct svga_buffer *sbuf); + +void svga_buffer_add_range(struct svga_buffer *sbuf, unsigned start, unsigned end); diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 7c9e600b9f..b61f85955a 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -48,31 +48,6 @@ #define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) -static unsigned int -svga_texture_is_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, int layer) -{ - struct svga_texture *tex = svga_texture(texture); - struct svga_screen *ss = svga_screen(pipe->screen); - - /** - * The screen does not cache texture writes. - */ - - if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle)) - return PIPE_UNREFERENCED; - - /** - * sws->surface_is_flushed() does not distinguish between read references - * and write references. So assume a reference is both. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - - - /* * Helper function and arrays */ @@ -146,16 +121,6 @@ svga_translate_format_render(enum pipe_format format) case PIPE_FORMAT_L8_UNORM: return svga_translate_format(format); -#if 1 - /* For on host conversion */ - case PIPE_FORMAT_DXT1_RGB: - return SVGA3D_X8R8G8B8; - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return SVGA3D_A8R8G8B8; -#endif - default: return SVGA3D_FORMAT_INVALID; } @@ -166,7 +131,8 @@ static INLINE void svga_transfer_dma_band(struct svga_context *svga, struct svga_transfer *st, SVGA3dTransferType transfer, - unsigned y, unsigned h, unsigned srcy) + unsigned y, unsigned h, unsigned srcy, + SVGA3dSurfaceDMAFlags flags) { struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCopyBox box; @@ -202,10 +168,10 @@ svga_transfer_dma_band(struct svga_context *svga, util_format_get_blocksize(texture->b.b.format) * 8 / (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format))); - ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); if(ret != PIPE_OK) { - svga->swc->flush(svga->swc, NULL); - ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); + svga_context_flush(svga, NULL); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); assert(ret == PIPE_OK); } } @@ -214,7 +180,8 @@ svga_transfer_dma_band(struct svga_context *svga, static INLINE void svga_transfer_dma(struct svga_context *svga, struct svga_transfer *st, - SVGA3dTransferType transfer) + SVGA3dTransferType transfer, + SVGA3dSurfaceDMAFlags flags) { struct svga_texture *texture = svga_texture(st->base.resource); struct svga_screen *screen = svga_screen(texture->b.b.screen); @@ -225,11 +192,17 @@ svga_transfer_dma(struct svga_context *svga, SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__); } + /* Ensure any pending operations on host surfaces are queued on the command + * buffer first. + */ + svga_surfaces_flush( svga ); if(!st->swbuf) { /* Do the DMA transfer in a single go */ - svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0); + svga_transfer_dma_band(svga, st, transfer, + st->base.box.y, st->base.box.height, 0, + flags); if(transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); @@ -275,7 +248,14 @@ svga_transfer_dma(struct svga_context *svga, } } - svga_transfer_dma_band(svga, st, transfer, y, h, srcy); + svga_transfer_dma_band(svga, st, transfer, y, h, srcy, flags); + + /* + * Prevent the texture contents to be discarded on the next band + * upload. + */ + + flags.discard = FALSE; if(transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); @@ -390,18 +370,25 @@ svga_texture_get_transfer(struct pipe_context *pipe, if(st->hw_nblocksy < nblocksy) { /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", - __FUNCTION__, - (nblocksy*st->base.stride + 1023)/1024, - (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, - (st->hw_nblocksy*st->base.stride + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting into %u x %u KB DMA transfers\n", + __FUNCTION__, + (nblocksy*st->base.stride + 1023)/1024, + (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (st->hw_nblocksy*st->base.stride + 1023)/1024); + } + st->swbuf = MALLOC(nblocksy*st->base.stride); if(!st->swbuf) goto no_swbuf; } - if (usage & PIPE_TRANSFER_READ) - svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM); + if (usage & PIPE_TRANSFER_READ) { + SVGA3dSurfaceDMAFlags flags; + memset(&flags, 0, sizeof flags); + svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags); + } return &st->base; @@ -460,7 +447,17 @@ svga_texture_transfer_destroy(struct pipe_context *pipe, struct svga_transfer *st = svga_transfer(transfer); if (st->base.usage & PIPE_TRANSFER_WRITE) { - svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM); + SVGA3dSurfaceDMAFlags flags; + + memset(&flags, 0, sizeof flags); + if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + flags.discard = TRUE; + } + if (transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + flags.unsynchronized = TRUE; + } + + svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags); ss->texture_timestamp++; tex->view_age[transfer->level] = ++(tex->age); if (transfer->resource->target == PIPE_TEXTURE_CUBE) @@ -483,7 +480,6 @@ struct u_resource_vtbl svga_texture_vtbl = { svga_texture_get_handle, /* get_handle */ svga_texture_destroy, /* resource_destroy */ - svga_texture_is_referenced, /* is_resource_referenced */ svga_texture_get_transfer, /* get_transfer */ svga_texture_transfer_destroy, /* transfer_destroy */ svga_texture_transfer_map, /* transfer_map */ @@ -527,7 +523,8 @@ svga_texture_create(struct pipe_screen *screen, tex->key.numFaces = 1; } - tex->key.cachable = 1; + /* XXX: Disabled for now */ + tex->key.cachable = 0; if (template->bind & PIPE_BIND_SAMPLER_VIEW) tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; @@ -571,6 +568,9 @@ svga_texture_create(struct pipe_screen *screen, if (tex->handle) SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); + debug_reference(&tex->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + return &tex->b.b; error2: diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index 6911f13f77..4f1f4b597e 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -32,6 +32,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_string.h" #include "svga_screen.h" #include "svga_context.h" @@ -41,14 +42,24 @@ #include "svga_surface.h" +void +svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv) +{ + char res[128]; + debug_describe_resource(res, sv->texture); + util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod); +} + struct svga_sampler_view * svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, unsigned min_lod, unsigned max_lod) { - struct svga_screen *ss = svga_screen(pt->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; + SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE; SVGA3dSurfaceFormat format = svga_translate_format(pt->format); boolean view = TRUE; @@ -68,10 +79,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, if (min_lod == 0 && max_lod >= pt->last_level) view = FALSE; - if (util_format_is_s3tc(pt->format) && view) { - format = svga_translate_format_render(pt->format); - } - if (ss->debug.no_sampler_view) view = FALSE; @@ -113,6 +120,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->key.cachable = 0; sv->handle = tex->handle; + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); return sv; } @@ -126,7 +135,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->age = tex->age; - sv->handle = svga_texture_view_surface(pipe, tex, format, + sv->handle = svga_texture_view_surface(svga, tex, flags, format, min_lod, max_lod - min_lod + 1, -1, -1, @@ -136,6 +145,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, assert(0); sv->key.cachable = 0; sv->handle = tex->handle; + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); return sv; } @@ -143,6 +154,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, svga_sampler_view_reference(&tex->cached_view, sv); pipe_mutex_unlock(ss->tex_mutex); + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + return sv; } diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h index e64665f2e5..2087c1be85 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.h +++ b/src/gallium/drivers/svga/svga_sampler_view.h @@ -83,12 +83,16 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * void svga_destroy_sampler_view_priv(struct svga_sampler_view *v); +void +svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv); + static INLINE void svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v) { struct svga_sampler_view *old = *ptr; - if (pipe_reference(&(*ptr)->reference, &v->reference)) + if (pipe_reference_described(&(*ptr)->reference, &v->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view)) svga_destroy_sampler_view_priv(old); *ptr = v; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 078190342a..6c987abe05 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -35,7 +35,6 @@ #include "svga_resource_texture.h" #include "svga_resource.h" #include "svga_debug.h" -#include "svga_surface.h" #include "svga3d_shaderdefs.h" @@ -226,13 +225,18 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return svgascreen->use_ps30 ? 32 : 12; return result.u; case PIPE_SHADER_CAP_MAX_ADDRS: - return svgascreen->use_ps30 ? 1 : 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + /* + * Although PS 3.0 has some addressing abilities it can only represent + * loops that can be statically determined and unrolled. Given we can + * only handle a subset of the cases that the state tracker already + * does it is better to defer loop unrolling to the state tracker. + */ + return 0; case PIPE_SHADER_CAP_MAX_PREDS: return svgascreen->use_ps30 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - return svgascreen->use_ps30 ? 1 : 0; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: @@ -338,8 +342,7 @@ svga_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags ) + unsigned tex_usage) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; SVGA3dDevCapIndex index; @@ -361,13 +364,6 @@ svga_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_B5G5R5A1_UNORM: return FALSE; - /* Simulate ability to render into compressed textures */ - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - default: break; } @@ -415,27 +411,26 @@ svga_fence_reference(struct pipe_screen *screen, } -static int +static boolean svga_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) + struct pipe_fence_handle *fence) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; - return sws->fence_signalled(sws, fence, flag); + return sws->fence_signalled(sws, fence, 0) == 0; } -static int +static boolean svga_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flag) + uint64_t timeout) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", __FUNCTION__, fence); - return sws->fence_finish(sws, fence, flag); + return sws->fence_finish(sws, fence, 0) == 0; } @@ -501,6 +496,12 @@ svga_screen_create(struct svga_winsys_screen *sws) svga_init_screen_resource_functions(svgascreen); + if (sws->get_hw_version) { + svgascreen->hw_version = sws->get_hw_version(sws); + } else { + svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1; + } + svgascreen->use_ps30 = sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index 86ec89d88c..7ef627f928 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -49,6 +49,8 @@ struct svga_screen struct pipe_screen screen; struct svga_winsys_screen *sws; + SVGA3dHardwareVersion hw_version; + unsigned use_ps30; unsigned use_vs30; diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h index 22d5a6d552..7f239e7a32 100644 --- a/src/gallium/drivers/svga/svga_state.h +++ b/src/gallium/drivers/svga/svga_state.h @@ -92,4 +92,8 @@ void svga_update_state_retry( struct svga_context *svga, enum pipe_error svga_emit_initial_state( struct svga_context *svga ); +enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga ); + +enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga ); + #endif diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 97c818cd37..6c3275e74c 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -40,9 +40,12 @@ /* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* */ -static int svga_shader_type( int unit ) +static int svga_shader_type( int shader ) { - return unit + 1; + assert(PIPE_SHADER_VERTEX + 1 == SVGA3D_SHADERTYPE_VS); + assert(PIPE_SHADER_FRAGMENT + 1 == SVGA3D_SHADERTYPE_PS); + assert(shader <= PIPE_SHADER_FRAGMENT); + return shader + 1; } @@ -110,7 +113,7 @@ static int emit_consts( struct svga_context *svga, done: if (data) - pipe_buffer_unmap(&svga->pipe, svga->curr.cb[unit], transfer); + pipe_buffer_unmap(&svga->pipe, transfer); return ret; } diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index fcbb35e797..cdadb20c17 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -93,6 +93,55 @@ static int emit_framebuffer( struct svga_context *svga, } +/* + * Rebind rendertargets. + * + * Similar to emit_framebuffer, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty rendertargets are properly paged-in. + */ +enum pipe_error +svga_reemit_framebuffer_bindings(struct svga_context *svga) +{ + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + unsigned i; + enum pipe_error ret; + + for (i = 0; i < MIN2(PIPE_MAX_COLOR_BUFS, 8); ++i) { + if (hw->cbufs[i]) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, hw->cbufs[i]); + if (ret != PIPE_OK) { + return ret; + } + } + } + + if (hw->zsbuf) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, hw->zsbuf); + if (ret != PIPE_OK) { + return ret; + } + + if (hw->zsbuf && + hw->zsbuf->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf); + if (ret != PIPE_OK) { + return ret; + } + } + else { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL); + if (ret != PIPE_OK) { + return ret; + } + } + } + + return PIPE_OK; +} + + struct svga_tracked_state svga_hw_framebuffer = { "hw framebuffer state", diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index ad6f294713..9c04adec8e 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -136,7 +136,7 @@ static int make_fs_key( const struct svga_context *svga, /* The blend workaround for simulating logicop xor behaviour * requires that the incoming fragment color be white. This change - * achieves that by creating a varient of the current fragment + * achieves that by creating a variant of the current fragment * shader that overrides all output colors with 1,1,1,1 * * This will work for most shaders, including those containing diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index 66fea02a4b..68c0257878 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -35,6 +35,11 @@ /*********************************************************************** */ + +/** + * Given a gallium vertex element format, return the corresponding SVGA3D + * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats. + */ static INLINE SVGA3dDeclType svga_translate_vertex_format(enum pipe_format format) { @@ -80,6 +85,7 @@ static int update_need_swvfetch( struct svga_context *svga, for (i = 0; i < svga->curr.velems->count; i++) { svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format); if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { + /* Unsupported format - use software fetch */ need_swvfetch = TRUE; break; } @@ -118,6 +124,11 @@ static int update_need_pipeline( struct svga_context *svga, __FUNCTION__, svga->curr.rast->need_pipeline, (1 << svga->curr.reduced_prim) ); + SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline tris (%s), lines (%s), points (%s)\n", + __FUNCTION__, + svga->curr.rast->need_pipeline_tris_str, + svga->curr.rast->need_pipeline_lines_str, + svga->curr.rast->need_pipeline_points_str); need_pipeline = TRUE; } @@ -140,6 +151,10 @@ static int update_need_pipeline( struct svga_context *svga, svga->dirty |= SVGA_NEW_NEED_PIPELINE; } + /* DEBUG */ + if (0 && svga->state.sw.need_pipeline) + debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline); + return 0; } @@ -164,20 +179,28 @@ static int update_need_swtnl( struct svga_context *svga, boolean need_swtnl; if (svga->debug.no_swtnl) { - svga->state.sw.need_swvfetch = 0; - svga->state.sw.need_pipeline = 0; + svga->state.sw.need_swvfetch = FALSE; + svga->state.sw.need_pipeline = FALSE; } need_swtnl = (svga->state.sw.need_swvfetch || svga->state.sw.need_pipeline); if (svga->debug.force_swtnl) { - need_swtnl = 1; + need_swtnl = TRUE; } + /* + * Some state changes the draw module does makes us belive we + * we don't need swtnl. This causes the vdecl code to pickup + * the wrong buffers and vertex formats. Try trivial/line-wide. + */ + if (svga->state.sw.in_swtnl_draw) + need_swtnl = TRUE; + if (need_swtnl != svga->state.sw.need_swtnl) { SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF, - "%s need_swvfetch: %s, need_pipeline %s\n", + "%s: need_swvfetch %s, need_pipeline %s\n", __FUNCTION__, svga->state.sw.need_swvfetch ? "true" : "false", svga->state.sw.need_pipeline ? "true" : "false"); diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index f8b269a101..c502506b93 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -52,6 +52,16 @@ void svga_cleanup_tss_binding(struct svga_context *svga) } +struct bind_queue { + struct { + unsigned unit; + struct svga_hw_view_state *view; + } bind[PIPE_MAX_SAMPLERS]; + + unsigned bind_count; +}; + + static int update_tss_binding(struct svga_context *svga, unsigned dirty ) @@ -63,15 +73,7 @@ update_tss_binding(struct svga_context *svga, unsigned min_lod; unsigned max_lod; - - struct { - struct { - unsigned unit; - struct svga_hw_view_state *view; - } bind[PIPE_MAX_SAMPLERS]; - - unsigned bind_count; - } queue; + struct bind_queue queue; queue.bind_count = 0; @@ -164,6 +166,64 @@ fail: } +/* + * Rebind textures. + * + * Similar to update_tss_binding, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty textures are properly paged-in. + */ +enum pipe_error +svga_reemit_tss_bindings(struct svga_context *svga) +{ + unsigned i; + enum pipe_error ret; + struct bind_queue queue; + + queue.bind_count = 0; + + for (i = 0; i < svga->state.hw_draw.num_views; i++) { + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + if (view->v) { + queue.bind[queue.bind_count].unit = i; + queue.bind[queue.bind_count].view = view; + queue.bind_count++; + } + } + + if (queue.bind_count) { + SVGA3dTextureState *ts; + + ret = SVGA3D_BeginSetTextureState(svga->swc, + &ts, + queue.bind_count); + if (ret != PIPE_OK) { + return ret; + } + + for (i = 0; i < queue.bind_count; i++) { + struct svga_winsys_surface *handle; + + ts[i].stage = queue.bind[i].unit; + ts[i].name = SVGA3D_TS_BIND_TEXTURE; + + assert(queue.bind[i].view->v); + handle = queue.bind[i].view->v->handle; + svga->swc->surface_relocation(svga->swc, + &ts[i].value, + handle, + SVGA_RELOC_READ); + } + + SVGA_FIFOCommitAll(svga->swc); + } + + return PIPE_OK; +} + + struct svga_tracked_state svga_hw_tss_binding = { "texture binding emit", SVGA_NEW_TEXTURE_BINDING | diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 3af7bf2b35..2f85f9488f 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -57,12 +57,14 @@ upload_user_buffers( struct svga_context *svga ) struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); if (!buffer->uploaded.buffer) { + boolean flushed; ret = u_upload_buffer( svga->upload_vb, - 0, + 0, 0, buffer->b.b.width0, &buffer->b.b, &buffer->uploaded.offset, - &buffer->uploaded.buffer ); + &buffer->uploaded.buffer, + &flushed); if (ret) return ret; @@ -76,7 +78,6 @@ upload_user_buffers( struct svga_context *svga ) buffer->b.b.width0); } - pipe_resource_reference( &svga->curr.vb[i].buffer, buffer->uploaded.buffer ); svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; } } @@ -108,6 +109,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, for (i = 0; i < svga->curr.velems->count; i++) { const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; + struct svga_buffer *buffer = svga_buffer(vb->buffer); svga_generate_vdecl_semantics( i, &usage, &index ); @@ -125,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, svga_hwtnl_vdecl( svga->hwtnl, i, &decl, + buffer->uploaded.buffer ? buffer->uploaded.buffer : vb->buffer ); } diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 5133c70593..ae9a20ebb8 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -229,13 +229,11 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, - vbuffer->stride, vbuffer->max_index); + vbuffer->stride, ~0); translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); - pipe_buffer_unmap(&svga->pipe, - vbuffer->buffer, - transfer); + pipe_buffer_unmap(&svga->pipe, transfer); translate->release(translate); } diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index 3e4bed76c0..3e8fb5f027 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -100,8 +100,9 @@ svga_texture_copy_handle(struct svga_context *svga, struct svga_winsys_surface * -svga_texture_view_surface(struct pipe_context *pipe, +svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, + SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, @@ -109,7 +110,7 @@ svga_texture_view_surface(struct pipe_context *pipe, int zslice_pick, struct svga_host_surface_cache_key *key) /* OUT */ { - struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_screen *ss = svga_screen(svga->pipe.screen); struct svga_winsys_surface *handle; uint32_t i, j; unsigned z_offset = 0; @@ -118,7 +119,7 @@ svga_texture_view_surface(struct pipe_context *pipe, "svga: Create surface view: face %d zslice %d mips %d..%d\n", face_pick, zslice_pick, start_mip, start_mip+num_mip-1); - key->flags = 0; + key->flags = flags; key->format = format; key->numMipLevels = num_mip; key->size.width = u_minify(tex->b.b.width0, start_mip); @@ -161,7 +162,7 @@ svga_texture_view_surface(struct pipe_context *pipe, u_minify(tex->b.b.depth0, i + start_mip) : 1); - svga_texture_copy_handle(svga_context(pipe), + svga_texture_copy_handle(svga, tex->handle, 0, 0, z_offset, i + start_mip, @@ -183,6 +184,7 @@ svga_create_surface(struct pipe_context *pipe, struct pipe_resource *pt, const struct pipe_surface *surf_tmpl) { + struct svga_context *svga = svga_context(pipe); struct svga_texture *tex = svga_texture(pt); struct pipe_screen *screen = pipe->screen; struct svga_surface *s; @@ -191,6 +193,7 @@ svga_create_surface(struct pipe_context *pipe, boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE; boolean view = FALSE; + SVGA3dSurfaceFlags flags; SVGA3dSurfaceFormat format; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -219,10 +222,18 @@ svga_create_surface(struct pipe_context *pipe, s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - if (!render) + if (!render) { + flags = SVGA3D_SURFACE_HINT_TEXTURE; format = svga_translate_format(surf_tmpl->format); - else + } else { + if (surf_tmpl->usage & PIPE_BIND_RENDER_TARGET) { + flags = SVGA3D_SURFACE_HINT_RENDERTARGET; + } + if (surf_tmpl->usage & PIPE_BIND_DEPTH_STENCIL) { + flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + } format = svga_translate_format_render(surf_tmpl->format); + } assert(format != SVGA3D_FORMAT_INVALID); @@ -249,7 +260,8 @@ svga_create_surface(struct pipe_context *pipe, SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", pt, surf_tmpl->u.tex.level, face, zslice, s); - s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level, + s->handle = svga_texture_view_surface(svga, tex, flags, format, + surf_tmpl->u.tex.level, 1, face, zslice, &s->key); s->real_face = 0; s->real_level = 0; @@ -329,7 +341,7 @@ void svga_mark_surfaces_dirty(struct svga_context *svga) * pipe is optional context to inline the blit command in. */ void -svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) +svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf) { struct svga_surface *s = svga_surface(surf); struct svga_texture *tex = svga_texture(surf->texture); @@ -354,7 +366,7 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) if (s->handle != tex->handle) { SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf); - svga_texture_copy_handle(svga_context(pipe), + svga_texture_copy_handle(svga, s->handle, 0, 0, 0, s->real_level, s->real_face, tex->handle, 0, 0, zslice, surf->u.tex.level, face, u_minify(tex->b.b.width0, surf->u.tex.level), diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index afb8326e1f..bffc8c22c6 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -56,14 +56,15 @@ struct svga_surface extern void -svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf); +svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf); extern boolean svga_surface_needs_propagation(struct pipe_surface *surf); struct svga_winsys_surface * -svga_texture_view_surface(struct pipe_context *pipe, +svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, + SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index ff3da84272..ac9d637f8c 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -87,11 +87,14 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render, svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->vbuf_size); if(!svga_render->vbuf) { svga_context_flush(svga, NULL); + assert(!svga_render->vbuf); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->vbuf_size); assert(svga_render->vbuf); } @@ -141,7 +144,7 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render, pipe_buffer_flush_mapped_range(&svga->pipe, svga_render->vbuf_transfer, offset, length); - pipe_buffer_unmap(&svga->pipe, svga_render->vbuf, svga_render->vbuf_transfer); + pipe_buffer_unmap(&svga->pipe, svga_render->vbuf_transfer); svga_render->min_index = min_index; svga_render->max_index = max_index; svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used); @@ -158,7 +161,7 @@ svga_vbuf_render_set_primitive( struct vbuf_render *render, } static void -svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render ) +svga_vbuf_submit_state( struct svga_vbuf_render *svga_render ) { struct svga_context *svga = svga_render->svga; SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; @@ -221,7 +224,8 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render, unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; enum pipe_error ret = 0; - svga_vbuf_sumbit_state(svga_render); + /* off to hardware */ + svga_vbuf_submit_state(svga_render); /* Need to call update_state() again as the draw module may have * altered some of our state behind our backs. Testcase: @@ -260,6 +264,7 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); svga_render->ibuf = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->ibuf_size); svga_render->ibuf_offset = 0; } @@ -267,9 +272,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, pipe_buffer_write_nooverlap(&svga->pipe, svga_render->ibuf, svga_render->ibuf_offset, 2 * nr_indices, indices); - /* off to hardware */ - svga_vbuf_sumbit_state(svga_render); + svga_vbuf_submit_state(svga_render); /* Need to call update_state() again as the draw module may have * altered some of our state behind our backs. Testcase: diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 814e8edd70..ad29c1b642 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -51,6 +51,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga, assert(svga->state.sw.need_swtnl); assert(draw); + /* Make sure that the need_swtnl flag does not go away */ + svga->state.sw.in_swtnl_draw = TRUE; + ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); if (ret) { svga_context_flush(svga, NULL); @@ -106,22 +109,23 @@ svga_swtnl_draw_vbo(struct svga_context *svga, * unmap vertex/index buffers */ for (i = 0; i < svga->curr.num_vertex_buffers; i++) { - pipe_buffer_unmap(&svga->pipe, svga->curr.vb[i].buffer, - vb_transfer[i]); + pipe_buffer_unmap(&svga->pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(draw, i, NULL); } if (ib_transfer) { - pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer); + pipe_buffer_unmap(&svga->pipe, ib_transfer); draw_set_mapped_index_buffer(draw, NULL); } if (svga->curr.cb[PIPE_SHADER_VERTEX]) { - pipe_buffer_unmap(&svga->pipe, - svga->curr.cb[PIPE_SHADER_VERTEX], - cb_transfer); + pipe_buffer_unmap(&svga->pipe, cb_transfer); } + /* Now safe to remove the need_swtnl flag in any update_state call */ + svga->state.sw.in_swtnl_draw = FALSE; + svga->dirty |= SVGA_NEW_NEED_PIPELINE | SVGA_NEW_NEED_SWVFETCH; + return ret; } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index a759238293..efda2f605b 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -61,7 +61,7 @@ static void set_draw_viewport( struct svga_context *svga ) * going to be drawn with triangles, but we're not catching all * cases where that will happen. */ - if (svga->curr.rast->templ.line_width > 1.0) + if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES) { adjx = SVGA_LINE_ADJ_X + 0.175; adjy = SVGA_LINE_ADJ_Y - 0.175; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index f2591c5721..99600cf5c0 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -57,7 +57,6 @@ translate_opcode( case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; - case TGSI_OPCODE_SSG: return SVGA3DOP_SGN; default: debug_printf("Unkown opcode %u\n", opcode); assert( 0 ); @@ -285,6 +284,41 @@ static void reset_temp_regs( struct svga_shader_emitter *emit ) } +/* Replace the src with the temporary specified in the dst, but copying + * only the necessary channels, and preserving the original swizzle (which is + * important given that several opcodes have constraints in the allowed + * swizzles). + */ +static boolean emit_repl( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register *src0) +{ + unsigned src0_swizzle; + unsigned chan; + + assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); + + src0_swizzle = src0->base.swizzle; + + dst.mask = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; + dst.mask |= 1 << swizzle; + } + assert(dst.mask); + + src0->base.swizzle = SVGA3DSWIZZLE_NONE; + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) + return FALSE; + + *src0 = src( dst ); + src0->base.swizzle = src0_swizzle; + + return TRUE; +} + + static boolean submit_op0( struct svga_shader_emitter *emit, SVGA3dShaderInstToken inst, SVGA3dShaderDestToken dest ) @@ -333,14 +367,11 @@ static boolean submit_op2( struct svga_shader_emitter *emit, src0.base.num != src1.base.num) need_temp = TRUE; - if (need_temp) - { + if (need_temp) { temp = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 )) + if (!emit_repl( emit, temp, &src0 )) return FALSE; - - src0 = src( temp ); } if (!emit_op2( emit, inst, dest, src0, src1 )) @@ -396,24 +427,18 @@ static boolean submit_op3( struct svga_shader_emitter *emit, (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) need_temp1 = TRUE; - if (need_temp0) - { + if (need_temp0) { temp0 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + if (!emit_repl( emit, temp0, &src0 )) return FALSE; - - src0 = src( temp0 ); } - if (need_temp1) - { + if (need_temp1) { temp1 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 )) + if (!emit_repl( emit, temp1, &src1 )) return FALSE; - - src1 = src( temp1 ); } if (!emit_op3( emit, inst, dest, src0, src1, src2 )) @@ -478,24 +503,18 @@ static boolean submit_op4( struct svga_shader_emitter *emit, (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) need_temp3 = TRUE; - if (need_temp0) - { + if (need_temp0) { temp0 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + if (!emit_repl( emit, temp0, &src0 )) return FALSE; - - src0 = src( temp0 ); } - if (need_temp3) - { + if (need_temp3) { temp3 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 )) + if (!emit_repl( emit, temp3, &src3 )) return FALSE; - - src3 = src( temp3 ); } if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) @@ -509,6 +528,55 @@ static boolean submit_op4( struct svga_shader_emitter *emit, } +static boolean alias_src_dst( struct src_register src, + SVGA3dShaderDestToken dst ) +{ + if (src.base.num != dst.num) + return FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != + SVGA3dShaderGetRegType(src.base.value)) + return FALSE; + + return TRUE; +} + + +static boolean submit_lrp(struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1, + struct src_register src2) +{ + SVGA3dShaderDestToken tmp; + boolean need_dst_tmp = FALSE; + + /* The dst reg must be a temporary, and not be the same as src0 or src2 */ + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || + alias_src_dst(src0, dst) || + alias_src_dst(src2, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + tmp = get_temp( emit ); + tmp.mask = dst.mask; + } + else { + tmp = dst; + } + + if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) + return FALSE; + + if (need_dst_tmp) { + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + + return TRUE; +} + + static boolean emit_def_const( struct svga_shader_emitter *emit, SVGA3dShaderConstType type, unsigned idx, @@ -747,7 +815,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit, static boolean emit_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { - const struct src_register src = translate_src_register( + struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); @@ -755,10 +823,23 @@ static boolean emit_if(struct svga_shader_emitter *emit, if_token.control = SVGA3DOPCOMPC_NE; zero = scalar(zero, TGSI_SWIZZLE_X); + if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { + /* + * Max different constant registers readable per IFC instruction is 1. + */ + + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) + return FALSE; + + src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); + } + emit->dynamic_branching_level++; return (emit_instruction( emit, if_token ) && - emit_src( emit, src ) && + emit_src( emit, src0 ) && emit_src( emit, zero ) ); } @@ -832,7 +913,7 @@ static boolean emit_cmp(struct svga_shader_emitter *emit, */ if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero)) return FALSE; - return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2); + return submit_lrp(emit, dst, src(temp), src1, src2); } /* CMP DST, SRC0, SRC2, SRC1 */ @@ -1066,6 +1147,41 @@ static boolean emit_cos(struct svga_shader_emitter *emit, return TRUE; } +static boolean emit_ssg(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp0 = get_temp( emit ); + SVGA3dShaderDestToken temp1 = get_temp( emit ); + struct src_register zero, one; + + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SGN DST, SRC0, TMP0, TMP1 */ + return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, + src( temp0 ), src( temp1 ) ); + } + + zero = get_zero_immediate( emit ); + one = scalar( zero, TGSI_SWIZZLE_W ); + zero = scalar( zero, TGSI_SWIZZLE_X ); + + /* CMP TMP0, SRC0, one, zero */ + if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), + writemask( temp0, dst.mask ), src0, one, zero )) + return FALSE; + + /* CMP TMP1, negate(SRC0), negate(one), zero */ + if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), + writemask( temp1, dst.mask ), negate( src0 ), negate( one ), + zero )) + return FALSE; + + /* ADD DST, TMP0, TMP1 */ + return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), + src( temp1 ) ); +} /* * ADD DST SRC0, negate(SRC0) @@ -1588,6 +1704,10 @@ static boolean emit_deriv(struct svga_shader_emitter *emit, } else { unsigned opcode; + const struct tgsi_full_src_register *reg = &insn->Src[0]; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src0; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_DDX: @@ -1600,7 +1720,21 @@ static boolean emit_deriv(struct svga_shader_emitter *emit, return FALSE; } - return emit_simple_instruction( emit, opcode, insn ); + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + src0 = translate_src_register( emit, reg ); + + /* We cannot use negate or abs on source to dsx/dsy instruction. + */ + if (reg->Register.Absolute || + reg->Register.Negate) { + SVGA3dShaderDestToken temp = get_temp( emit ); + + if (!emit_repl( emit, temp, &src0 )) + return FALSE; + } + + return submit_op1( emit, inst, dst, src0 ); } } @@ -1624,19 +1758,6 @@ static boolean emit_arl(struct svga_shader_emitter *emit, } } -static boolean alias_src_dst( struct src_register src, - SVGA3dShaderDestToken dst ) -{ - if (src.base.num != dst.num) - return FALSE; - - if (SVGA3dShaderGetRegType(dst.value) != - SVGA3dShaderGetRegType(src.base.value)) - return FALSE; - - return TRUE; -} - static boolean emit_pow(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { @@ -1729,37 +1850,14 @@ static boolean emit_lrp(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( emit, &insn->Src[2] ); - boolean need_dst_tmp = FALSE; - - /* The dst reg must not be the same as src0 or src2 */ - if (alias_src_dst(src0, dst) || - alias_src_dst(src2, dst)) - need_dst_tmp = TRUE; - if (need_dst_tmp) { - tmp = get_temp( emit ); - tmp.mask = dst.mask; - } - else { - tmp = dst; - } - - if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) - return FALSE; - - if (need_dst_tmp) { - if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) - return FALSE; - } - - return TRUE; + return submit_lrp(emit, dst, src0, src1, src2); } @@ -2366,6 +2464,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_LRP: return emit_lrp( emit, insn ); + case TGSI_OPCODE_SSG: + return emit_ssg( emit, insn ); + default: { unsigned opcode = translate_opcode(insn->Instruction.Opcode); @@ -2715,6 +2816,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) return TRUE; if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) return TRUE; } diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 5e4bdeff2e..ae61cea083 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -136,6 +136,9 @@ struct svga_winsys_screen void (*destroy)(struct svga_winsys_screen *sws); + SVGA3dHardwareVersion + (*get_hw_version)(struct svga_winsys_screen *sws); + boolean (*get_cap)(struct svga_winsys_screen *sws, SVGA3dDevCapIndex index, @@ -243,12 +246,12 @@ struct svga_winsys_screen /** * Map the entire data store of a buffer object into the client's address. - * flags is a bitmaks of PIPE_TRANSFER_* + * usage is a bitmask of PIPE_TRANSFER_* */ void * (*buffer_map)( struct svga_winsys_screen *sws, struct svga_winsys_buffer *buf, - unsigned flags ); + unsigned usage ); void (*buffer_unmap)( struct svga_winsys_screen *sws, diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c index 95612a8006..ad1549d9f8 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -136,7 +136,7 @@ static struct sh_opcode_info opcode_info[] = { "dsy", 1, 1, 0, 0, SVGA3DOP_INVALID, }, { "texldd", 1, 4, 0, 0, SVGA3DOP_INVALID, }, { "setp", 1, 2, 0, 0, SVGA3DOP_SETP, }, - { "texldl", 1, 2, 0, 0, SVGA3DOP_INVALID, }, + { "texldl", 1, 2, 0, 0, SVGA3DOP_TEXLDL, }, { "breakp", 0, 1, 0, 0, SVGA3DOP_INVALID, }, }; @@ -156,6 +156,8 @@ const struct sh_opcode_info *svga_opcode_info( uint op ) if (info->svga_opcode == SVGA3DOP_INVALID) { /* No valid information. Please provide number of dst/src registers. */ + _debug_printf("Missing information for opcode %u, '%s'\n", op, + opcode_info[op].mnemonic); assert( 0 ); return NULL; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index eaabae8ce4..4db7619c42 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1184,7 +1184,6 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe, static INLINE void trace_context_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct trace_context *tr_ctx = trace_context(_pipe); @@ -1193,9 +1192,8 @@ trace_context_flush(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "flush"); trace_dump_arg(ptr, pipe); - trace_dump_arg(uint, flags); - pipe->flush(pipe, flags, fence); + pipe->flush(pipe, fence); if(fence) trace_dump_ret(ptr, *fence); @@ -1219,31 +1217,6 @@ trace_context_destroy(struct pipe_context *_pipe) FREE(tr_ctx); } -static unsigned int -trace_is_resource_referenced( struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, int layer) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_tex = trace_resource(_resource); - struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *texture = tr_tex->resource; - unsigned int referenced; - - trace_dump_call_begin("pipe_context", "is_resource_referenced"); - trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, texture); - trace_dump_arg(uint, level); - trace_dump_arg(int, layer); - - referenced = pipe->is_resource_referenced(pipe, texture, level, layer); - - trace_dump_ret(uint, referenced); - trace_dump_call_end(); - - return referenced; -} - /******************************************************************** * transfer @@ -1419,6 +1392,28 @@ trace_context_transfer_inline_write(struct pipe_context *_context, } +static void trace_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_resource *tr_tex = trace_resource(_resource); + struct pipe_context *context = tr_context->pipe; + struct pipe_resource *resource = tr_tex->resource; + + assert(resource->screen == context->screen); + + trace_dump_call_begin("pipe_context", "redefine_user_buffer"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, offset); + trace_dump_arg(uint, size); + + trace_dump_call_end(); + + context->redefine_user_buffer(context, resource, offset, size); +} static const struct debug_named_value rbug_blocker_flags[] = { @@ -1498,7 +1493,6 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.clear_render_target = trace_context_clear_render_target; tr_ctx->base.clear_depth_stencil = trace_context_clear_depth_stencil; tr_ctx->base.flush = trace_context_flush; - tr_ctx->base.is_resource_referenced = trace_is_resource_referenced; tr_ctx->base.get_transfer = trace_context_get_transfer; tr_ctx->base.transfer_destroy = trace_context_transfer_destroy; @@ -1506,6 +1500,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.transfer_unmap = trace_context_transfer_unmap; tr_ctx->base.transfer_flush_region = trace_context_transfer_flush_region; tr_ctx->base.transfer_inline_write = trace_context_transfer_inline_write; + tr_ctx->base.redefine_user_buffer = trace_redefine_user_buffer; tr_ctx->pipe = pipe; diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 51a4ea9633..8a4ec20fb8 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -60,10 +60,9 @@ static struct os_stream *stream = NULL; static unsigned refcount = 0; -static pipe_mutex call_mutex; +pipe_static_mutex(call_mutex); static long unsigned call_no = 0; static boolean dumping = FALSE; -static boolean initialized = FALSE; static INLINE void @@ -225,26 +224,13 @@ trace_dump_trace_close(void) stream = NULL; refcount = 0; call_no = 0; - pipe_mutex_destroy(call_mutex); } } -void trace_dump_init() -{ - if (initialized) - return; - - pipe_mutex_init(call_mutex); - dumping = FALSE; - initialized = TRUE; -} - boolean trace_dump_trace_begin() { const char *filename; - assert(initialized); - filename = debug_get_option("GALLIUM_TRACE", NULL); if(!filename) return FALSE; diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 74c5e83e9e..62b4fe429b 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -43,11 +43,6 @@ struct pipe_transfer; struct pipe_box; /* - * Call before use. - */ -void trace_dump_init(void); - -/* * Low level dumping controls. * * Opening the trace file and checking if that is opened. diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 155c869fbd..18805655bd 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -517,7 +517,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, max_index); trace_dump_member(uint, state, buffer_offset); trace_dump_member(resource_ptr, state, buffer); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index c2de2daa88..42180c4f19 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -158,8 +158,7 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -172,10 +171,9 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, trace_dump_arg(int, target); trace_dump_arg(uint, sample_count); trace_dump_arg(uint, tex_usage); - trace_dump_arg(uint, geom_flags); result = screen->is_format_supported(screen, format, target, sample_count, - tex_usage, geom_flags); + tex_usage); trace_dump_ret(bool, result); @@ -393,10 +391,9 @@ trace_screen_fence_reference(struct pipe_screen *_screen, } -static int +static boolean trace_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -406,11 +403,10 @@ trace_screen_fence_signalled(struct pipe_screen *_screen, trace_dump_arg(ptr, screen); trace_dump_arg(ptr, fence); - trace_dump_arg(uint, flags); - result = screen->fence_signalled(screen, fence, flags); + result = screen->fence_signalled(screen, fence); - trace_dump_ret(int, result); + trace_dump_ret(bool, result); trace_dump_call_end(); @@ -418,10 +414,10 @@ trace_screen_fence_signalled(struct pipe_screen *_screen, } -static int +static boolean trace_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -431,11 +427,11 @@ trace_screen_fence_finish(struct pipe_screen *_screen, trace_dump_arg(ptr, screen); trace_dump_arg(ptr, fence); - trace_dump_arg(uint, flags); + trace_dump_arg(uint, timeout); - result = screen->fence_finish(screen, fence, flags); + result = screen->fence_finish(screen, fence, timeout); - trace_dump_ret(int, result); + trace_dump_ret(bool, result); trace_dump_call_end(); @@ -472,8 +468,6 @@ trace_enabled(void) return trace; firstrun = FALSE; - trace_dump_init(); - if(trace_dump_trace_begin()) { trace_dumping_start(); trace = TRUE; |