From 1278507e3bf2e83c7027820a0d313de267a440ff Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 24 Dec 2009 00:55:49 +0100 Subject: gallium: adapt drivers to pipe_constant_buffer removal --- src/gallium/drivers/cell/ppu/cell_context.h | 2 +- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 12 ++++++------ src/gallium/drivers/cell/ppu/cell_state_emit.c | 6 +++--- src/gallium/drivers/cell/ppu/cell_state_shader.c | 4 ++-- src/gallium/drivers/failover/fo_context.h | 2 +- src/gallium/drivers/failover/fo_state.c | 2 +- src/gallium/drivers/i915/i915_context.h | 3 ++- src/gallium/drivers/i915/i915_state.c | 12 ++++++------ src/gallium/drivers/i965/brw_pipe_shader.c | 6 +++--- src/gallium/drivers/identity/id_context.c | 12 ++++++------ src/gallium/drivers/llvmpipe/lp_context.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_context.h | 2 +- src/gallium/drivers/llvmpipe/lp_state.h | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 9 ++++----- src/gallium/drivers/nv04/nv04_state.c | 12 ++++++------ src/gallium/drivers/nv10/nv10_state.c | 12 ++++++------ src/gallium/drivers/nv20/nv20_state.c | 12 ++++++------ src/gallium/drivers/nv30/nv30_state.c | 6 +++--- src/gallium/drivers/nv40/nv40_state.c | 6 +++--- src/gallium/drivers/nv50/nv50_state.c | 6 +++--- src/gallium/drivers/r300/r300_state.c | 14 +++++++------- src/gallium/drivers/softpipe/sp_context.c | 4 ++-- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 12 ++++++------ src/gallium/drivers/softpipe/sp_state.h | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 5 ++--- src/gallium/drivers/svga/svga_pipe_constants.c | 4 ++-- src/gallium/drivers/trace/tr_context.c | 11 ++++++----- src/gallium/drivers/trace/tr_dump_state.c | 4 ++-- src/gallium/drivers/trace/tr_dump_state.h | 2 +- 30 files changed, 96 insertions(+), 96 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9..1498fb665a 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -115,7 +115,7 @@ struct cell_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_buffer *constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 5cc1d4ddf8..2016b21a40 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -51,17 +51,17 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) { - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) { + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], - sp->constants[i].buffer->size); + sp->constants[i]->size); } } draw_set_mapped_constant_buffer(sp->draw, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX].buffer->size); + sp->constants[PIPE_SHADER_VERTEX]->size); } static void @@ -70,8 +70,8 @@ cell_unmap_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index ac5fafec1a..ab55a24bb6 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -240,12 +240,12 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader].buffer->size / sizeof(float); + const uint num_const = cell->constants[shader]->size / sizeof(float); uint i, j; float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); uint32_t *ibuf = (uint32_t *) buf; const float *constants = pipe_buffer_map(cell->pipe.screen, - cell->constants[shader].buffer, + cell->constants[shader], PIPE_BUFFER_USAGE_CPU_READ); ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; ibuf[4] = num_const; @@ -253,7 +253,7 @@ cell_emit_state(struct cell_context *cell) for (i = 0; i < num_const; i++) { buf[j++] = constants[i]; } - pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader]); } if (cell->dirty & (CELL_NEW_FRAMEBUFFER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 6568c784fe..cf5d681499 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -183,7 +183,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct cell_context *cell = cell_context(pipe); @@ -193,7 +193,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, draw_flush(cell->draw); /* note: reference counting */ - pipe_buffer_reference(&cell->constants[shader].buffer, buf->buffer); + pipe_buffer_reference(&cell->constants[shader], buf); if (shader == PIPE_SHADER_VERTEX) cell->dirty |= CELL_NEW_VS_CONSTANTS; diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 149393712a..c8be885457 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -125,7 +125,7 @@ failover_context( struct pipe_context *pipe ) void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf); + const struct pipe_buffer *buf); #endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 3f5f556032..55ccab6e98 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -495,7 +495,7 @@ failover_set_vertex_elements(struct pipe_context *pipe, void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct failover_context *failover = failover_context(pipe); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 234b441ce6..37cbd56036 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -233,7 +233,8 @@ struct i915_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + /* XXX unneded */ + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e580b6c0f7..4d48b5a0be 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -517,7 +517,7 @@ static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) static void i915_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct i915_context *i915 = i915_context(pipe); struct pipe_screen *screen = pipe->screen; @@ -537,13 +537,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, */ if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(screen, buf->buffer, + if (buf->size && + (mapped = pipe_buffer_map(screen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - pipe_buffer_unmap(screen, buf->buffer); + memcpy(i915->current.constants[shader], mapped, buf->size); + pipe_buffer_unmap(screen, buf); i915->current.num_user_constants[shader] - = buf->buffer->size / (4 * sizeof(float)); + = buf->size / (4 * sizeof(float)); } else { i915->current.num_user_constants[shader] = 0; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 20f20571f6..ede6b347a5 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -255,7 +255,7 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) static void brw_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct brw_context *brw = brw_context(pipe); @@ -263,13 +263,13 @@ static void brw_set_constant_buffer(struct pipe_context *pipe, if (shader == PIPE_SHADER_FRAGMENT) { pipe_buffer_reference( &brw->curr.fragment_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; } else { pipe_buffer_reference( &brw->curr.vertex_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index bdbaae5987..fb4d91a114 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -404,17 +404,17 @@ static void identity_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *_buffer) + const struct pipe_buffer *_buffer) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - struct pipe_constant_buffer unwrapped_buffer; - struct pipe_constant_buffer *buffer = NULL; + struct pipe_buffer *unwrapped_buffer; + struct pipe_buffer *buffer = NULL; - /* unwrap the input state */ + /* XXX hmm? unwrap the input state */ if (_buffer) { - unwrapped_buffer.buffer = identity_buffer_unwrap(_buffer->buffer); - buffer = &unwrapped_buffer; + unwrapped_buffer = identity_buffer_unwrap(_buffer); + buffer = unwrapped_buffer; } pipe->set_constant_buffer(pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 001311e703..f381156d00 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -124,8 +124,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } for (i = 0; i < Elements(llvmpipe->constants); i++) { - if (llvmpipe->constants[i].buffer) { - pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); + if (llvmpipe->constants[i]) { + pipe_buffer_reference(&llvmpipe->constants[i], NULL); } } diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index cc4d5ad5fd..ca50585896 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -64,7 +64,7 @@ struct llvmpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color[4][16]; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 5cee7bf74b..131e3621e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -155,7 +155,7 @@ void llvmpipe_set_clip_state( struct pipe_context *, void llvmpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + const struct pipe_buffer *buf); void *llvmpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 22683ff8b4..867e8ceea7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -713,12 +713,11 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *constants) + const struct pipe_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_buffer *buffer = constants ? constants->buffer : NULL; - unsigned size = buffer ? buffer->size : 0; - const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; + unsigned size = constants ? constants->size : 0; + const void *data = constants ? llvmpipe_buffer(constants)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); @@ -727,7 +726,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, draw_flush(llvmpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + pipe_buffer_reference(&llvmpipe->constants[shader], constants); if(shader == PIPE_SHADER_FRAGMENT) { llvmpipe->jit_context.constants = data; diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index ef3005db5f..ac69b29616 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -332,7 +332,7 @@ nv04_set_clip_state(struct pipe_context *pipe, static void nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + const struct pipe_buffer *buf ) { struct nv04_context *nv04 = nv04_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -342,13 +342,13 @@ nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf && buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv04->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv04->constbuf[shader], mapped, buf->size); nv04->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index ffc6be3c40..150ab5029c 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -458,7 +458,7 @@ nv10_set_clip_state(struct pipe_context *pipe, static void nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + const struct pipe_buffer *buf ) { struct nv10_context *nv10 = nv10_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -468,13 +468,13 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv10->constbuf[shader], mapped, buf->size); nv10->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index 3a82e63423..d7893b4f0f 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -451,7 +451,7 @@ nv20_set_clip_state(struct pipe_context *pipe, static void nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + const struct pipe_buffer *buf ) { struct nv20_context *nv20 = nv20_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -461,13 +461,13 @@ nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv20->constbuf[shader], mapped, buf->size); nv20->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index e6321b480f..dfac46e23f 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -590,12 +590,12 @@ nv30_set_clip_state(struct pipe_context *pipe, static void nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + const struct pipe_buffer *buf ) { struct nv30_context *nv30 = nv30_context(pipe); - nv30->constbuf[shader] = buf->buffer; - nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv30->constbuf[shader] = buf; + nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv30->dirty |= NV30_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index ed55d29aff..81ccbb5ea5 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -605,12 +605,12 @@ nv40_set_clip_state(struct pipe_context *pipe, static void nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + const struct pipe_buffer *buf ) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->constbuf[shader] = buf->buffer; - nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv40->constbuf[shader] = buf; + nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv40->dirty |= NV40_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 88aef52d08..f462558aec 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -588,16 +588,16 @@ nv50_set_clip_state(struct pipe_context *pipe, static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + const struct pipe_buffer *buf ) { struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; } } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 49072462ec..b627895d4c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -804,22 +804,22 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) static void r300_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct r300_context* r300 = r300_context(pipe); void *mapped; - if (buf == NULL || buf->buffer->size == 0 || - (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + if (buf == NULL || buf->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) { r300->shader_constants[shader].count = 0; return; } - assert((buf->buffer->size % 4 * sizeof(float)) == 0); - memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); - r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pipe->screen, buf->buffer); + assert((buf->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->size); + r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf); if (shader == PIPE_SHADER_VERTEX) r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2a33587b5a..0862e9f24b 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -113,8 +113,8 @@ softpipe_destroy( struct pipe_context *pipe ) } for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); + if (softpipe->constants[i]) { + pipe_buffer_reference(&softpipe->constants[i], NULL); } } diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 8ce20c5744..ac24fccb4c 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -62,7 +62,7 @@ struct softpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 518ef8806e..96e1c5d815 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -51,13 +51,13 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) uint i, size; for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); } - if (sp->constants[PIPE_SHADER_VERTEX].buffer) - size = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + if (sp->constants[PIPE_SHADER_VERTEX]) + size = sp->constants[PIPE_SHADER_VERTEX]->size; else size = 0; @@ -81,8 +81,8 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) draw_set_mapped_constant_buffer(sp->draw, NULL, 0); for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 26d5c3fbb2..1169520b44 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -133,7 +133,7 @@ void softpipe_set_clip_state( struct pipe_context *, void softpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + const struct pipe_buffer *buf); void *softpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b41f7e8ab7..fa85946c6a 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -152,7 +152,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) void softpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -160,8 +160,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(&softpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&softpipe->constants[shader], buf); softpipe->dirty |= SP_NEW_CONSTANTS; } diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index 10e7a12189..ea0f833cea 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -49,7 +49,7 @@ struct svga_constbuf static void svga_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_buffer *buf) { struct svga_context *svga = svga_context(pipe); @@ -57,7 +57,7 @@ static void svga_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); pipe_buffer_reference( &svga->curr.cb[shader], - buf->buffer ); + buf ); if (shader == PIPE_SHADER_FRAGMENT) svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 80f4874b78..11044d6845 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -825,13 +825,13 @@ trace_context_set_clip_state(struct pipe_context *_pipe, static INLINE void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *buffer) + const struct pipe_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; if (buffer) - trace_screen_user_buffer_update(_pipe->screen, buffer->buffer); + trace_screen_user_buffer_update(_pipe->screen, buffer); trace_dump_call_begin("pipe_context", "set_constant_buffer"); @@ -840,10 +840,11 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, trace_dump_arg(uint, index); trace_dump_arg(constant_buffer, buffer); + /* XXX hmm? */ if (buffer) { - struct pipe_constant_buffer _buffer; - _buffer.buffer = trace_buffer_unwrap(tr_ctx, buffer->buffer); - pipe->set_constant_buffer(pipe, shader, index, &_buffer); + struct pipe_buffer *_buffer; + _buffer = trace_buffer_unwrap(tr_ctx, buffer); + pipe->set_constant_buffer(pipe, shader, index, _buffer); } else { pipe->set_constant_buffer(pipe, shader, index, buffer); } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 0102cc1876..5794c90298 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -229,7 +229,7 @@ void trace_dump_clip_state(const struct pipe_clip_state *state) } -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +void trace_dump_constant_buffer(const struct pipe_buffer *state) { if (!trace_dumping_enabled_locked()) return; @@ -241,7 +241,7 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) trace_dump_struct_begin("pipe_constant_buffer"); - trace_dump_member(buffer_ptr, state, buffer); + trace_dump_reference(&state->reference); trace_dump_struct_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 07ad6fbb20..c7860fd6e1 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -47,7 +47,7 @@ void trace_dump_scissor_state(const struct pipe_scissor_state *state); void trace_dump_clip_state(const struct pipe_clip_state *state); -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); +void trace_dump_constant_buffer(const struct pipe_buffer *state); void trace_dump_token(const struct tgsi_token *token); -- cgit v1.2.3 From 7ca0ce38340144794267609646048b3820d594ab Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 29 Dec 2009 23:21:01 +0100 Subject: Implement draw_arrays_instanced() in softpipe. Modify the translate module to respect instance divisors and accept instance id as a parameter to calculate input vertex offset. --- src/gallium/auxiliary/draw/draw_context.h | 8 ++++ src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_pt.c | 23 ++++++++++ src/gallium/auxiliary/draw/draw_pt_emit.c | 3 ++ src/gallium/auxiliary/draw/draw_pt_fetch.c | 3 ++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++ src/gallium/auxiliary/draw/draw_vs.h | 1 + src/gallium/auxiliary/draw/draw_vs_varient.c | 6 +++ src/gallium/auxiliary/tgsi/tgsi_dump.c | 3 +- src/gallium/auxiliary/translate/translate.h | 2 + .../auxiliary/translate/translate_generic.c | 16 +++++-- src/gallium/auxiliary/translate/translate_sse.c | 1 + src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 51 ++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_state.h | 8 ++++ src/gallium/drivers/svga/svga_state_vs.c | 1 + 17 files changed, 130 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 465b8f10c6..c0f6a61411 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -151,6 +151,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1a5269c0de..bb8a8ff491 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -275,6 +275,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3850cede1e..129d919a84 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -226,6 +226,8 @@ struct draw_context unsigned reduced_prim; + unsigned instance_id; + void *driver_private; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2801dbafe4..1217b9e5d7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -312,5 +312,28 @@ draw_arrays(struct draw_context *draw, unsigned prim, #endif /* drawing done here: */ + draw->instance_id = 0; draw_pt_arrays(draw, prim, start, count); } + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + + if (reduced_prim != draw->reduced_prim) { + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + draw->reduced_prim = reduced_prim; + } + + for (instance = 0; instance < instanceCount; instance++) { + draw->instance_id = instance + startInstance; + draw_pt_arrays(draw, mode, start, count); + } +} diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295..d0abeb9336 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -125,6 +125,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +205,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +265,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 305bfef435..e8174a2971 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -81,6 +81,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; + key.element[nr].instance_divisor = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; dst_offset += 1 * sizeof(float); @@ -100,6 +101,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.element[nr].input_format = draw->pt.vertex_element[i].src_format; key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor; key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[nr].output_offset = dst_offset; @@ -183,6 +185,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, translate->run( translate, start, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e7fe6b3b76..40bfc0fbb2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -169,6 +169,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; + key.element[i].instance_divisor = src->instance_divisor; key.element[i].output_format = output_format; key.element[i].output_offset = dst_offset; @@ -314,6 +315,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); if (0) { @@ -374,6 +376,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e3b807ebd0..00036cfe68 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -43,6 +43,7 @@ struct draw_varient_input enum pipe_format format; unsigned buffer; unsigned offset; + unsigned instance_divisor; }; struct draw_varient_output diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 7ee567d478..4cc080f803 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -180,6 +180,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -202,6 +203,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, @@ -238,6 +240,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -283,6 +286,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].instance_divisor = 0; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; fetch.element[i].output_offset = i * 4 * sizeof(float); assert(fetch.element[i].output_offset < fetch.output_stride); @@ -297,6 +301,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].instance_divisor = 0; emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; assert(emit.element[i].input_offset <= fetch.output_stride); @@ -305,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; + emit.element[i].instance_divisor = 0; emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].output_offset = key->element[i].out.offset; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 5e7e5d2ff9..4391ca75d1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -122,7 +122,8 @@ static const char *semantic_names[] = "GENERIC", "NORMAL", "FACE", - "EDGEFLAG" + "EDGEFLAG", + "INSTANCEID" }; static const char *immediate_type_names[] = diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 1afdf194b3..fb298471b8 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -50,6 +50,7 @@ struct translate_element enum pipe_format output_format; unsigned input_buffer:8; unsigned input_offset:24; + unsigned instance_divisor; unsigned output_offset; }; @@ -79,6 +80,7 @@ struct translate { void (PIPE_CDECL *run)( struct translate *, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer); }; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 266e7ee81e..0fa9927409 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -49,6 +49,7 @@ struct translate_generic { fetch_func fetch; unsigned buffer; unsigned input_offset; + unsigned instance_divisor; emit_func emit; unsigned output_offset; @@ -607,6 +608,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -622,13 +624,20 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", @@ -687,6 +696,7 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index b62db8d8f3..edd0be17f0 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -637,6 +637,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2a33587b5a..406414ae3d 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -238,6 +238,7 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 518ef8806e..6a593fb06a 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -184,3 +184,54 @@ softpipe_draw_elements(struct pipe_context *pipe, 0, 0xffffffff, mode, start, count ); } + +boolean +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + sp->reduced_api_prim = u_reduced_prim(mode); + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + softpipe_map_transfers(sp); + softpipe_map_constant_buffers(sp); + + /* Map vertex buffers */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + + draw_set_mapped_element_buffer_range(draw, 0, start, + start + count - 1, NULL); + + /* draw! */ + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); + + /* unmap vertex/index buffers - will cause draw module to flush */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); + } + + /* Note: leave drawing surfaces mapped */ + softpipe_unmap_constant_buffers(sp); + + sp->dirty_render_cache = TRUE; + + return TRUE; +} diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 26d5c3fbb2..13935fd799 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -189,6 +189,14 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void softpipe_map_transfers(struct softpipe_context *sp); diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 44b7ceb4fa..114de1a49e 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -198,6 +198,7 @@ static int update_zero_stride( struct svga_context *svga, key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); -- cgit v1.2.3 From bccdb239c700e0c7c90d27a281d5246b958581b5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 18:27:58 +0100 Subject: sp: Implement draw_elements_instanced(). --- src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 46 +++++++++++++++++++++++++-- src/gallium/drivers/softpipe/sp_state.h | 10 ++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 406414ae3d..969d69d6b4 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -239,6 +239,7 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6a593fb06a..debf5bfe06 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -192,6 +192,26 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount) +{ + return softpipe_draw_elements_instanced(pipe, + NULL, + 0, + mode, + start, + count, + startInstance, + instanceCount); +} + +boolean +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -216,8 +236,26 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, buf); } - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + 0, + 0xffffffff, + mapped_indexes); + } else { + /* no index/element buffer */ + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); + } /* draw! */ draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); @@ -227,6 +265,10 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe_buffer_unmap(pipe->screen, indexBuffer); + } /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 13935fd799..00da41b985 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -197,6 +197,16 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); +boolean +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void softpipe_map_transfers(struct softpipe_context *sp); -- cgit v1.2.3 From 230355648b647b32161124cad23be553a2c6d196 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 18:41:05 +0100 Subject: sp: Generalise drawing code to remove dupes. Also, avoid nested draw calls from simpler versions. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 165 +++++++++++++------------- 1 file changed, 84 insertions(+), 81 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index debf5bfe06..14cb1322e1 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -88,19 +88,41 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static boolean +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + return softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ boolean softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -109,67 +131,16 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { - struct softpipe_context *sp = softpipe_context(pipe); - struct draw_context *draw = sp->draw; - unsigned i; - - sp->reduced_api_prim = u_reduced_prim(mode); - - if (sp->dirty) - softpipe_update_derived( sp ); - - softpipe_map_transfers(sp); - softpipe_map_constant_buffers(sp); - - /* - * Map vertex buffers - */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } - - /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, - mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); - } - - /* draw! */ - draw_arrays(draw, mode, start, count); - - /* - * unmap vertex/index buffers - will cause draw module to flush - */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); - } - if (indexBuffer) { - draw_set_mapped_element_buffer(draw, 0, NULL); - pipe_buffer_unmap(pipe->screen, indexBuffer); - } - - - /* Note: leave drawing surfaces mapped */ - softpipe_unmap_constant_buffers(sp); - - sp->dirty_render_cache = TRUE; - - return TRUE; + return softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); } @@ -179,10 +150,16 @@ softpipe_draw_elements(struct pipe_context *pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + return softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } boolean @@ -193,14 +170,16 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { - return softpipe_draw_elements_instanced(pipe, - NULL, - 0, - mode, - start, - count, - startInstance, - instanceCount); + return softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); } boolean @@ -212,6 +191,30 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount) +{ + return softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static boolean +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -245,8 +248,8 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(draw, indexSize, - 0, - 0xffffffff, + minIndex, + maxIndex, mapped_indexes); } else { /* no index/element buffer */ -- cgit v1.2.3 From 543b9566bdaa48fea2df1866fa1310c1cdbcde27 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 22:18:53 +0100 Subject: Add lame support for instanceID to draw module. It's all screaming for integer support -- fake it with float for now. --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_pt.h | 3 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 32 +++++++--- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 1 + .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 14 ++++- src/gallium/auxiliary/draw/draw_vs_varient.c | 3 + src/gallium/auxiliary/translate/translate.h | 6 ++ .../auxiliary/translate/translate_generic.c | 26 +++++--- src/gallium/auxiliary/translate/translate_sse.c | 70 +++++++++++++++------- src/gallium/drivers/svga/svga_state_vs.c | 1 + 11 files changed, 118 insertions(+), 44 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index bb8a8ff491..d40c035240 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -271,7 +271,8 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227..d5e0d92a60 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d0abeb9336..4fb53276bb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,7 +121,8 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index f88a839f61..36c27e22ff 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -58,12 +58,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,6 +80,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; @@ -92,16 +95,27 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; /* XXX: Make it UINT. */ + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + ei++; + } key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[nr].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 771d94b973..2a604470e9 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,6 +166,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 932113783d..0238f2e234 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -58,6 +58,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + boolean instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -65,6 +67,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -78,7 +89,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 8e14bdd6bd..60b7a3ea36 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -284,6 +284,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; @@ -299,6 +300,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); @@ -308,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 9ae7a482a0..54ed2c1a4b 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,8 +44,14 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 742f03b503..24727d4988 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,6 +46,8 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; @@ -632,22 +634,27 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src; char *dst = (vert + tg->attrib[attr].output_offset); - if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + data[0] = (float)instance_id; } - tg->attrib[attr].fetch( src, data ); - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -700,6 +707,7 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index ba4a246fdb..8e152a002a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -50,13 +50,15 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, unsigned instance_id, - void *output_buffer ); + void *output_buffer, + float instance_id_float ); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, unsigned instance_id, - void *output_buffer ); + void *output_buffer, + float instance_id_float ); struct translate_buffer { const void *base_ptr; @@ -70,6 +72,9 @@ struct translate_buffer_varient { }; +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + + struct translate_sse { struct translate translate; @@ -97,6 +102,7 @@ struct translate_sse { boolean use_instancing; unsigned instance_id; + float instance_id_float; /* XXX: needed while no integer support in TGSI */ run_func gen_run; run_elts_func gen_run_elts; @@ -443,6 +449,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, unsigned var_idx, struct x86_reg elt ) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id_float)); + } if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } @@ -577,6 +587,14 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), p->tmp_EAX); + + /* XXX: temporary */ + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 6)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id_float)), + p->tmp_EAX); } /* Get vertex count, compare to zero @@ -697,7 +715,8 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, elts, count, instance_id, - output_buffer ); + output_buffer, + (float)instance_id ); } static void PIPE_CDECL translate_sse_run( struct translate *translate, @@ -712,7 +731,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, start, count, instance_id, - output_buffer ); + output_buffer, + (float)instance_id); } @@ -735,29 +755,35 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run = translate_sse_run; for (i = 0; i < key->nr_elements; i++) { - unsigned j; + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); - if (key->element[i].instance_divisor) { - p->use_instancing = TRUE; - } + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } - /* - * Map vertex element to vertex buffer varient. - */ - for (j = 0; j < p->nr_buffer_varients; j++) { - if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && - p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { - break; + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; } - if (j == p->nr_buffer_varients) { - p->buffer_varient[j].buffer_index = key->element[i].input_buffer; - p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; - p->nr_buffer_varients++; - } - p->element_to_buffer_varient[i] = j; } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 114de1a49e..82e7874e2a 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -194,6 +194,7 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; -- cgit v1.2.3 From 2b4acd26b1680eda9622205bbc1879da30d085bc Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 6 Jan 2010 08:07:00 -0800 Subject: nv50: add missing parentheses in nv50_query_result() NOUVEAU_BO_RD is defined (1 << 2), and `|' has higher precedence than `?' so the second argument of nouveau_bo_map was always 0. Signed-off-by: Roel Kluin Signed-off-by: Brian Paul --- src/gallium/drivers/nv50/nv50_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5305c93d59..f605c475ac 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - wait ? 0 : NOUVEAU_BO_NOWAIT); + (wait ? 0 : NOUVEAU_BO_NOWAIT)); if (ret) return false; q->result = ((uint32_t *)q->bo->map)[1]; -- cgit v1.2.3 From 468f270f3fc0efc136d03d0b6022916ddd8ea893 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 6 Jan 2010 17:51:08 -0800 Subject: svga: Silence uninitialized variable warning. --- src/gallium/drivers/svga/svga_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 272d1dd14e..44bb58c900 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -67,7 +67,7 @@ static enum pipe_error compile_fs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_fragment_program( fs, key ); if (result == NULL) { -- cgit v1.2.3 From 4775723d2f641dcd82e8c9cd39ba52f8d86158c7 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 8 Jan 2010 21:43:54 -0800 Subject: r300g: Silence unused variable warnings. --- src/gallium/drivers/r300/r300_flush.c | 1 + src/gallium/drivers/r300/r300_state_derived.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc..4282357521 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -39,6 +39,7 @@ static void r300_flush(struct pipe_context* pipe, struct r300_query *query; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7166694edf..55430a9ad6 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -464,7 +464,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Update the vertex format. */ static void r300_update_derived_shader_state(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); + /* struct r300_screen* r300screen = r300_screen(r300->context.screen); */ struct r300_vertex_info* vformat; struct r300_rs_block* rs_block; int i; -- cgit v1.2.3 From a5f771d7583f9cd2d47bc795fe9231d647659432 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 8 Jan 2010 17:15:52 +0100 Subject: nv50: try to honor sprite coord mode --- src/gallium/drivers/nv50/nv50_program.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 53f9f0adf3..bf50f073ad 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3877,12 +3877,13 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } -static void +static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { struct nv50_program *fp = nv50->fragprog; struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; + uint32_t origin = 0x00000010; /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in @@ -3916,7 +3917,9 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) if (mode == PIPE_SPRITE_COORD_NONE) { m += n; continue; - } + } else + if (mode == PIPE_SPRITE_COORD_LOWER_LEFT) + origin = 0; } /* this is either PointCoord or replaced by sprite coords */ @@ -3927,6 +3930,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) ++m; } } + return origin; } static int @@ -4025,7 +4029,7 @@ nv50_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj */ - so = so_new(6, 58, 0); + so = so_new(7, 57, 0); n = (m + 3) / 4; so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); @@ -4043,7 +4047,9 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { - nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); + so_data (so, + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff)); so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); so_datap (so, pcrd, 8); -- cgit v1.2.3 From 222738fcf54c3164083e6330eb837c0ed0489f0f Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 8 Jan 2010 16:04:11 +0100 Subject: nv50: free src temp_temps on emit --- src/gallium/drivers/nv50/nv50_program.c | 74 +++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bf50f073ad..3c1e9eca87 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -138,6 +138,7 @@ struct nv50_pc { uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; + struct nv50_program_exec *temp_temp_exec[16]; unsigned temp_temp_nr; /* broadcast and destination replacement regs */ @@ -347,23 +348,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) } static struct nv50_reg * -temp_temp(struct nv50_pc *pc) +temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + pc->temp_temp_exec[pc->temp_temp_nr] = e; return pc->temp_temp[pc->temp_temp_nr++]; } +/* This *must* be called for all nv50_program_exec that have been + * given as argument to temp_temp, or the temps will be leaked ! + */ static void -kill_temp_temp(struct nv50_pc *pc) +kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { int i; for (i = 0; i < pc->temp_temp_nr; i++) - free_temp(pc, pc->temp_temp[i]); - pc->temp_temp_nr = 0; + if (pc->temp_temp_exec[i] == e) + free_temp(pc, pc->temp_temp[i]); + if (!e) + pc->temp_temp_nr = 0; } static int @@ -425,6 +432,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + kill_temp_temp(pc, e); } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -780,7 +789,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_reg *temp; if (src->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } @@ -799,7 +808,7 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -815,7 +824,7 @@ static void set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -823,7 +832,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x00800000)); if (e->inst[0] & 0x01000000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -845,7 +854,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) set_long(pc, e); if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -853,7 +862,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x01000000)); if (e->inst[0] & 0x00800000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -1321,7 +1330,7 @@ emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, src1 = rsrc1; if (src0->mod & rsrc1->mod & NV50_MOD_NEG) { - src1 = alloc_temp(pc, NULL); + src1 = temp_temp(pc, e); emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32); } @@ -1350,9 +1359,6 @@ emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, e->inst[0] |= 1 << 22; emit(pc, e); - - if (src1 != rsrc1) - free_temp(pc, src1); } static void @@ -1422,10 +1428,10 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (mask & (1 << 2)) { set_pred_wr(pc, 1, 0, pc->p->exec_tail); - tmp[1] = temp_temp(pc); + tmp[1] = temp_temp(pc, NULL); emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); + tmp[3] = temp_temp(pc, NULL); emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128); emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128); @@ -2153,7 +2159,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, } if ((r->mod & mod) != r->mod) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); emit_cvt(pc, temp, r, -1, cvn); r->mod = 0; r = temp; @@ -2341,7 +2347,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, brdc = temp = pc->r_brdc; if (brdc && brdc->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (sat) brdc = temp; } else @@ -2350,7 +2356,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; /* rdst[c] = dst[c]; */ /* done above */ - dst[c] = temp_temp(pc); + dst[c] = temp_temp(pc, NULL); } } @@ -2384,7 +2390,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_ARL: assert(src[0][0]); - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); emit_arl(pc, dst[0], temp, 4); break; @@ -2441,7 +2447,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_COS, brdc, temp); @@ -2529,8 +2535,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, struct nv50_reg *t[2]; assert(!temp); - t[0] = temp_temp(pc); - t[1] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); + t[1] = temp_temp(pc, NULL); if (mask & 0x6) emit_mov(pc, t[0], src[0][0]); @@ -2575,7 +2581,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_FRC: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2639,9 +2645,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, { struct nv50_reg *t[2]; - t[0] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); if (mask & (1 << 1)) - t[1] = temp_temp(pc); + t[1] = temp_temp(pc, NULL); else t[1] = t[0]; @@ -2664,7 +2670,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_LRP: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2711,6 +2717,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: @@ -2719,6 +2727,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; src[0][0]->mod |= NV50_MOD_ABS; emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; @@ -2730,7 +2740,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_SCS: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) @@ -2759,7 +2769,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_SIN, brdc, temp); @@ -2848,7 +2858,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_XPD: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -2902,7 +2912,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - kill_temp_temp(pc); + kill_temp_temp(pc, NULL); pc->reg_instance_nr = 0; return TRUE; @@ -3101,7 +3111,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (is_scalar_op(insn.Instruction.Opcode)) { pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); if (!pc->r_brdc) - pc->r_brdc = temp_temp(pc); + pc->r_brdc = temp_temp(pc, NULL); return nv50_program_tx_insn(pc, &insn); } pc->r_brdc = NULL; -- cgit v1.2.3 From 99637ba80e376e8dcdce8fb1597f55256c461aee Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 8 Jan 2010 17:07:40 +0100 Subject: nv50: handle TGSI_OPCODE_UMAD,UMUL,NOT and fix SAD --- src/gallium/drivers/nv50/nv50_program.c | 152 +++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3c1e9eca87..e16fa479e5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -246,7 +246,8 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); } static INLINE struct nv50_reg * @@ -286,7 +287,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); return NULL; } @@ -876,6 +878,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) e->inst[1] |= ((src->hw & 127) << 14); } +static void +set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh, + struct nv50_program_exec *e, int pos) +{ + struct nv50_reg *r = src; + + alloc_reg(pc, r); + if (r->type != P_TEMP) { + r = temp_temp(pc, e); + emit_mov(pc, r, src); + } + + if (r->hw > (NV50_SU_MAX_TEMP / 2)) { + NOUVEAU_ERR("out of low GPRs\n"); + abort(); + } + + e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32); +} + static void emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) { @@ -1058,6 +1080,20 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, emit(pc, e); } +static void +emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + e->inst[1] = 0x0402c000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_1(pc, src, e); + + emit(pc, e); +} + static void emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir) @@ -1086,6 +1122,27 @@ emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } +static void +emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, int s) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4100000; + if (s < 0) { + e->inst[1] |= 1 << 29; + s = -s; + } + e->inst[1] |= ((s & 0x7f) << 16); + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) @@ -1361,6 +1418,43 @@ emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } +static void +emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1, + struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x60000000; + if (!pc->allow32) + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + + emit(pc, e); +} + static void emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) @@ -1368,6 +1462,9 @@ emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e = exec(pc); e->inst[0] = 0x50000000; + if (!pc->allow32) + set_long(pc, e); + check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); set_src_0(pc, src0, e); set_src_1(pc, src1, e); @@ -1379,6 +1476,8 @@ emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, e->inst[1] |= 0x0c << 24; else e->inst[0] |= 0x81 << 8; + + emit(pc, e); } static INLINE void @@ -1890,7 +1989,11 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) case 0x5: /* SAD */ m = ~(0x81 << 8); - q = 0x0c << 24; + q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12); + break; + case 0x6: + /* MAD u16 */ + q = (e->inst[0] & (0x7f << 2)) << 12; break; case 0x8: /* INTERP (move centroid, perspective and flat bits) */ @@ -1970,8 +2073,11 @@ get_supported_mods(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_IMAX: case TGSI_OPCODE_IMIN: case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_UMAD: case TGSI_OPCODE_UMAX: case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMUL: case TGSI_OPCODE_USHR: return NV50_MOD_I32; default: @@ -2713,6 +2819,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_NOT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_not(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_POW: emit_pow(pc, brdc, src[0][0], src[1][0]); break; @@ -2857,6 +2970,39 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_UMAD: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0, + temp); + emit_add_b32(pc, dst[c], temp, src[2][c]); + } + } + break; + case TGSI_OPCODE_UMUL: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0, + temp); + } + } + break; case TGSI_OPCODE_XPD: temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { -- cgit v1.2.3 From cdc11056deaf2ea4a6b8f834f7074dce7b49d9b1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 10 Jan 2010 11:20:11 +0000 Subject: llvmpipe: Yet another hack to get release LLVM static libraries to link in debug builds. --- src/gallium/drivers/llvmpipe/lp_bld_misc.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index d3f78c06d9..6e79438ead 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -59,3 +59,17 @@ LLVMInitializeNativeTarget(void) #endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif -- cgit v1.2.3 From 12576556e4aef8ae95ee93a9436e7a878f371b1c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 10 Jan 2010 18:37:07 +0000 Subject: llvmpipe: Update instructions. Explain how to build for windows. Both MSVC 9 and cross MinGW supported. Stop documenting LLVM 2.5 as supported. It still supported at the moment but it will soon stop being. --- src/gallium/drivers/llvmpipe/README | 39 +++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 0c3f00fd58..72d9f39658 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -59,27 +59,16 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.5 or greater. LLVM 2.6 is preferred. + - LLVM 2.6. - On Debian based distributions do: + For Linux, on a recent Debian based distribution do: aptitude install llvm-dev - There is a typo in one of the llvm 2.5 headers, that may cause compilation - errors. To fix it apply the change: - - --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 - +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 - @@ -831,7 +831,7 @@ - template - inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { - #if DEBUG - - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I) - + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) - cast(*I); - #endif - return reinterpret_cast(Vals); - + For Windows download pre-built MSVC 9.0 or MinGW binaries from + http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment + variable to the extracted path. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -95,9 +84,9 @@ Requirements Building ======== -To build everything invoke scons as: +To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -105,12 +94,15 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as but the rest of these instructions assume that scons is used. +For windows is everything the except except the winsys: + + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false Using ===== -Building will create a drop-in alternative for libGL.so. To use it set the -environment variables: +On Linux, building will create a drop-in alternative for libGL.so. To use it +set the environment variables: export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH @@ -121,6 +113,11 @@ or For performance evaluation pass debug=no to scons, and use the corresponding lib directory without the "-debug" suffix. +On Windows, building will create a drop-in alternative for opengl32.dll. To use +it put it in the same directory as the application. It can also be used by +replacing the native ICD driver, but it's quite an advanced usage, so if you +need to ask, don't even try it. + Unit testing ============ -- cgit v1.2.3 From d496399156a7d9a683999e41f126232f89375f2b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 09:35:55 -0800 Subject: r300g: Start using atoms. No benefits yet. --- src/gallium/drivers/r300/r300_blit.c | 2 +- src/gallium/drivers/r300/r300_context.c | 15 +++++++++++++++ src/gallium/drivers/r300/r300_context.h | 14 ++++++++++++-- src/gallium/drivers/r300/r300_emit.c | 14 +++++++++----- src/gallium/drivers/r300/r300_emit.h | 2 +- src/gallium/drivers/r300/r300_state.c | 7 ++++--- 6 files changed, 42 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ffe066d536..6c6e0567a4 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -27,7 +27,7 @@ static void r300_blitter_save_states(struct r300_context* r300) { - util_blitter_save_blend(r300->blitter, r300->blend_state); + util_blitter_save_blend(r300->blitter, r300->blend_state.state); util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); util_blitter_save_rasterizer(r300->blitter, r300->rs_state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d5c2d63d39..2cdc946e90 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_blit.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_flush.h" #include "r300_query.h" #include "r300_render.h" @@ -107,6 +108,18 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } +#define R300_INIT_ATOM(name) \ + r300->name##_state.state = NULL; \ + r300->name##_state.emit = r300_emit_##name##_state; \ + r300->name##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->name##_state); + +static void r300_setup_atoms(struct r300_context* r300) +{ + make_empty_list(&r300->atom_list); + R300_INIT_ATOM(blend); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys) { @@ -166,6 +179,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, PIPE_BUFFER_USAGE_VERTEX, 4096); make_empty_list(&r300->query_list); + r300_setup_atoms(r300); + r300_init_flush_functions(r300); r300_init_query_functions(r300); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 232530b7dc..41582504fd 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,9 +30,18 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +struct r300_context; + struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_atom { + struct r300_atom *prev, *next; + void* state; + void (*emit)(struct r300_context*, void*); + boolean dirty; +}; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -135,7 +144,6 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND 0x00000001 #define R300_NEW_BLEND_COLOR 0x00000002 #define R300_NEW_CLIP 0x00000004 #define R300_NEW_DSA 0x00000008 @@ -273,8 +281,10 @@ struct r300_context { struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ + /* Beginning of atom list. */ + struct r300_atom atom_list; /* Blend state. */ - struct r300_blend_state* blend_state; + struct r300_atom blend_state; /* Blend color state. */ struct r300_blend_color_state* blend_color_state; /* User clip planes. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index f8bfa714fe..8c9c7e9d03 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cs.h" @@ -36,9 +37,9 @@ #include "r300_texture.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend) +void r300_emit_blend_state(struct r300_context* r300, void* state) { + struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); @@ -978,6 +979,7 @@ void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; + struct r300_atom* atom; int i, dirty_tex = 0; boolean invalid = FALSE; @@ -1060,9 +1062,11 @@ validate: r300->dirty_state &= ~R300_NEW_QUERY; } - if (r300->dirty_state & R300_NEW_BLEND) { - r300_emit_blend_state(r300, r300->blend_state); - r300->dirty_state &= ~R300_NEW_BLEND; + foreach(atom, &r300->atom_list) { + if (atom->dirty) { + atom->emit(r300, atom->state); + atom->dirty = FALSE; + } } if (r300->dirty_state & R300_NEW_BLEND_COLOR) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3797d3d332..34356438e4 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -32,7 +32,7 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend); + void* blend); void r300_emit_blend_color_state(struct r300_context* r300, struct r300_blend_color_state* bc); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a145a7f18a..db8aca6c9f 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -317,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state = (struct r300_blend_state*)state; - r300->dirty_state |= R300_NEW_BLEND; + r300->blend_state.state = state; + r300->blend_state.dirty = TRUE; } /* Free blend state. */ @@ -521,8 +521,9 @@ static void r300->dirty_state |= R300_NEW_SCISSOR; } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_BLEND; r300->dirty_state |= R300_NEW_DSA; + + r300->blend_state.dirty = TRUE; } /* Create fragment shader state. */ -- cgit v1.2.3 From 9d3db601c782805113e60ee7f6976184f2786427 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 10:26:15 -0800 Subject: r300g: Atomize blend color. --- src/gallium/drivers/r300/r300_context.c | 5 +++-- src/gallium/drivers/r300/r300_context.h | 3 +-- src/gallium/drivers/r300/r300_emit.c | 9 ++------- src/gallium/drivers/r300/r300_emit.h | 6 ++---- src/gallium/drivers/r300/r300_state.c | 10 ++++++---- 5 files changed, 14 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 2cdc946e90..d16889de34 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -70,7 +70,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } - FREE(r300->blend_color_state); + FREE(r300->blend_color_state.state); FREE(r300->rs_block); FREE(r300->scissor_state); FREE(r300->vertex_info); @@ -118,6 +118,7 @@ static void r300_setup_atoms(struct r300_context* r300) { make_empty_list(&r300->atom_list); R300_INIT_ATOM(blend); + R300_INIT_ATOM(blend_color); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -168,7 +169,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); - r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 41582504fd..c916a860f6 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -144,7 +144,6 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND_COLOR 0x00000002 #define R300_NEW_CLIP 0x00000004 #define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 @@ -286,7 +285,7 @@ struct r300_context { /* Blend state. */ struct r300_atom blend_state; /* Blend color state. */ - struct r300_blend_color_state* blend_color_state; + struct r300_atom blend_color_state; /* User clip planes. */ struct pipe_clip_state clip_state; /* Shader constants. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 8c9c7e9d03..5ae9c2a9bd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -58,9 +58,9 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc) +void r300_emit_blend_color_state(struct r300_context* r300, void* state) { + struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -1069,11 +1069,6 @@ validate: } } - if (r300->dirty_state & R300_NEW_BLEND_COLOR) { - r300_emit_blend_color_state(r300, r300->blend_color_state); - r300->dirty_state &= ~R300_NEW_BLEND_COLOR; - } - if (r300->dirty_state & R300_NEW_CLIP) { r300_emit_clip_state(r300, &r300->clip_state); r300->dirty_state &= ~R300_NEW_CLIP; diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 34356438e4..005a9d50b0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,11 +31,9 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, - void* blend); +void r300_emit_blend_state(struct r300_context* r300, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc); +void r300_emit_blend_color_state(struct r300_context* r300, void* state); void r300_emit_clip_state(struct r300_context* r300, struct pipe_clip_state* clip); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index db8aca6c9f..35d698b820 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -340,20 +340,22 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_blend_color_state* state = + (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); - r300->blend_color_state->blend_color = uc.ui; + state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ - r300->blend_color_state->blend_color_red_alpha = + state->blend_color_red_alpha = float_to_fixed10(color->color[0]) | (float_to_fixed10(color->color[3]) << 16); - r300->blend_color_state->blend_color_green_blue = + state->blend_color_green_blue = float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); - r300->dirty_state |= R300_NEW_BLEND_COLOR; + r300->blend_color_state.dirty = TRUE; } static void r300_set_clip_state(struct pipe_context* pipe, -- cgit v1.2.3 From 249374b85346663417fc09e4c1ad3c6fb908067b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 10:36:57 -0800 Subject: r300g: Atomize UCP. Meh. --- src/gallium/drivers/r300/r300_context.c | 3 +++ src/gallium/drivers/r300/r300_context.h | 3 +-- src/gallium/drivers/r300/r300_emit.c | 9 ++------- src/gallium/drivers/r300/r300_emit.h | 3 +-- src/gallium/drivers/r300/r300_state.c | 4 ++-- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d16889de34..489f701e46 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -71,6 +71,7 @@ static void r300_destroy_context(struct pipe_context* context) } FREE(r300->blend_color_state.state); + FREE(r300->clip_state.state); FREE(r300->rs_block); FREE(r300->scissor_state); FREE(r300->vertex_info); @@ -119,6 +120,7 @@ static void r300_setup_atoms(struct r300_context* r300) make_empty_list(&r300->atom_list); R300_INIT_ATOM(blend); R300_INIT_ATOM(blend_color); + R300_INIT_ATOM(clip); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -170,6 +172,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_shader_key_compare); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index c916a860f6..3a54659df4 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -144,7 +144,6 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_CLIP 0x00000004 #define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 @@ -287,7 +286,7 @@ struct r300_context { /* Blend color state. */ struct r300_atom blend_color_state; /* User clip planes. */ - struct pipe_clip_state clip_state; + struct r300_atom clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 5ae9c2a9bd..1517eed923 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -77,9 +77,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, void* state) } } -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip) +void r300_emit_clip_state(struct r300_context* r300, void* state) { + struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -1069,11 +1069,6 @@ validate: } } - if (r300->dirty_state & R300_NEW_CLIP) { - r300_emit_clip_state(r300, &r300->clip_state); - r300->dirty_state &= ~R300_NEW_CLIP; - } - if (r300->dirty_state & R300_NEW_DSA) { r300_emit_dsa_state(r300, r300->dsa_state); r300->dirty_state &= ~R300_NEW_DSA; diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 005a9d50b0..81b5f735ef 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -35,8 +35,7 @@ void r300_emit_blend_state(struct r300_context* r300, void* state); void r300_emit_blend_color_state(struct r300_context* r300, void* state); -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip); +void r300_emit_clip_state(struct r300_context* r300, void* state); void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 35d698b820..732292fdaf 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -364,8 +364,8 @@ static void r300_set_clip_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (r300_screen(pipe->screen)->caps->has_tcl) { - r300->clip_state = *state; - r300->dirty_state |= R300_NEW_CLIP; + memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); + r300->clip_state.dirty = TRUE; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); -- cgit v1.2.3 From 07ea7e6c80ef2bcb69ab12af69d27f7e118bc15a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 11:08:14 -0800 Subject: r300g: Atomize DSA. Also a bit of ztop. --- src/gallium/drivers/r300/r300_blit.c | 2 +- src/gallium/drivers/r300/r300_context.c | 1 + src/gallium/drivers/r300/r300_context.h | 3 +-- src/gallium/drivers/r300/r300_emit.c | 13 ++---------- src/gallium/drivers/r300/r300_emit.h | 3 +-- src/gallium/drivers/r300/r300_state.c | 6 +++--- src/gallium/drivers/r300/r300_state_derived.c | 29 +++++++++++++++------------ 7 files changed, 25 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6c6e0567a4..b2fec4a50b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -28,7 +28,7 @@ static void r300_blitter_save_states(struct r300_context* r300) { util_blitter_save_blend(r300->blitter, r300->blend_state.state); - util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); util_blitter_save_rasterizer(r300->blitter, r300->rs_state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 489f701e46..81a14c02b2 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -121,6 +121,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(blend); R300_INIT_ATOM(blend_color); R300_INIT_ATOM(clip); + R300_INIT_ATOM(dsa); } struct pipe_context* r300_create_context(struct pipe_screen* screen, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 3a54659df4..1445ba5699 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -144,7 +144,6 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 @@ -290,7 +289,7 @@ struct r300_context { /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ - struct r300_dsa_state* dsa_state; + struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1517eed923..852ac98863 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -107,9 +107,9 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) } -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa) +void r300_emit_dsa_state(struct r300_context* r300, void* state) { + struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -983,10 +983,6 @@ void r300_emit_dirty_state(struct r300_context* r300) int i, dirty_tex = 0; boolean invalid = FALSE; - if (!(r300->dirty_state)) { - return; - } - /* Check size of CS. */ /* Make sure we have at least 8*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to @@ -1069,11 +1065,6 @@ validate: } } - if (r300->dirty_state & R300_NEW_DSA) { - r300_emit_dsa_state(r300, r300->dsa_state); - r300->dirty_state &= ~R300_NEW_DSA; - } - if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { r300_emit_fragment_depth_config(r300, r300->fs); if (r300screen->caps->is_r500) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 81b5f735ef..7cea50e627 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -37,8 +37,7 @@ void r300_emit_blend_color_state(struct r300_context* r300, void* state); void r300_emit_clip_state(struct r300_context* r300, void* state); -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa); +void r300_emit_dsa_state(struct r300_context* r300, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 732292fdaf..288b2149b5 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -463,8 +463,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->dsa_state = (struct r300_dsa_state*)state; - r300->dirty_state |= R300_NEW_DSA; + r300->dsa_state.state = state; + r300->dsa_state.dirty = TRUE; } /* Free DSA state. */ @@ -523,9 +523,9 @@ static void r300->dirty_state |= R300_NEW_SCISSOR; } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_DSA; r300->blend_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; } /* Create fragment shader state. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 22660a52d9..0f6ff02d9c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -508,7 +508,7 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + uint32_t ztop = r300->ztop_state.z_buffer_top; /* This is important enough that I felt it warranted a comment. * @@ -534,14 +534,20 @@ static void r300_update_ztop(struct r300_context* r300) */ /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ - r300->fs->info.uses_kill)) { /* (2) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + ztop = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ + ztop = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop = R300_ZTOP_DISABLE; + } else { + ztop = R300_ZTOP_ENABLE; + } + + if (r300->ztop_state.z_buffer_top != ztop) { + r300->ztop_state.z_buffer_top = ztop; } } @@ -553,8 +559,5 @@ void r300_update_derived_state(struct r300_context* r300) r300_update_derived_shader_state(r300); } - if (r300->dirty_state & - (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { - r300_update_ztop(r300); - } + r300_update_ztop(r300); } -- cgit v1.2.3 From 8a2c961798b4ab1f1095f14d814242422020d4f9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 11:49:25 -0800 Subject: r300g: Atomize ZTOP. Also do state-change checks. ZTOP's too important to not check. --- src/gallium/drivers/r300/r300_context.c | 7 +++++-- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_emit.c | 13 +++++++++++-- src/gallium/drivers/r300/r300_emit.h | 2 ++ src/gallium/drivers/r300/r300_state_derived.c | 9 ++++++--- 5 files changed, 25 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 81a14c02b2..abd5d26874 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -76,6 +76,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->scissor_state); FREE(r300->vertex_info); FREE(r300->viewport_state); + FREE(r300->ztop_state.state); FREE(r300); } @@ -118,6 +119,7 @@ static void r300_flush_cb(void *data) static void r300_setup_atoms(struct r300_context* r300) { make_empty_list(&r300->atom_list); + R300_INIT_ATOM(ztop); R300_INIT_ATOM(blend); R300_INIT_ATOM(blend_color); R300_INIT_ATOM(clip); @@ -172,20 +174,21 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); + r300_setup_atoms(r300); + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); make_empty_list(&r300->query_list); - r300_setup_atoms(r300); - r300_init_flush_functions(r300); r300_init_query_functions(r300); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1445ba5699..56c49f7efa 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -311,7 +311,7 @@ struct r300_context { /* Viewport state. */ struct r300_viewport_state* viewport_state; /* ZTOP state. */ - struct r300_ztop_state ztop_state; + struct r300_atom ztop_state; /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 852ac98863..27fb9aa9ba 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -113,7 +113,7 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* not needed since we use the 8bit alpha ref */ @@ -132,7 +132,6 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) } OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { @@ -956,6 +955,16 @@ void r300_emit_texture_count(struct r300_context* r300) } +void r300_emit_ztop_state(struct r300_context* r300, void* state) +{ + struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); + END_CS; +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 7cea50e627..0ccae0031b 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -90,6 +90,8 @@ void r300_emit_viewport_state(struct r300_context* r300, void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, void* state); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 0f6ff02d9c..a4029fca49 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -508,7 +508,9 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - uint32_t ztop = r300->ztop_state.z_buffer_top; + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; + uint32_t ztop = ztop_state->z_buffer_top; /* This is important enough that I felt it warranted a comment. * @@ -546,8 +548,9 @@ static void r300_update_ztop(struct r300_context* r300) ztop = R300_ZTOP_ENABLE; } - if (r300->ztop_state.z_buffer_top != ztop) { - r300->ztop_state.z_buffer_top = ztop; + if (ztop_state->z_buffer_top != ztop) { + ztop_state->z_buffer_top = ztop; + r300->ztop_state.dirty = TRUE; } } -- cgit v1.2.3 From 46fafdd455bc1f3ee05c076b3c8c541ecd4132dc Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 14:29:32 -0800 Subject: r300g: Atomize rasterizer. I want to stab things now. --- src/gallium/drivers/r300/r300_blit.c | 2 +- src/gallium/drivers/r300/r300_context.c | 1 + src/gallium/drivers/r300/r300_context.h | 3 +-- src/gallium/drivers/r300/r300_emit.c | 36 ++++++++++++++------------- src/gallium/drivers/r300/r300_emit.h | 2 +- src/gallium/drivers/r300/r300_render.c | 7 +++--- src/gallium/drivers/r300/r300_state.c | 13 +++++----- src/gallium/drivers/r300/r300_state_derived.c | 2 +- 8 files changed, 34 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index b2fec4a50b..c14414fff6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -29,7 +29,7 @@ static void r300_blitter_save_states(struct r300_context* r300) { util_blitter_save_blend(r300->blitter, r300->blend_state.state); util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); - util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index abd5d26874..9319b5ecfc 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -124,6 +124,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(blend_color); R300_INIT_ATOM(clip); R300_INIT_ATOM(dsa); + R300_INIT_ATOM(rs); } struct pipe_context* r300_create_context(struct pipe_screen* screen, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 56c49f7efa..e8c56bbf87 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -147,7 +147,6 @@ struct r300_ztop_state { #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 @@ -295,7 +294,7 @@ struct r300_context { /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ - struct r300_rs_state* rs_state; + struct r300_atom rs_state; /* RS block state. */ struct r300_rs_block* rs_block; /* Sampler states. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 27fb9aa9ba..60e4a109d4 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -145,6 +145,9 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state; + boolean vte_enabled = viewport->vte_control & ~R300_VTX_W0_FMT; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -167,16 +170,17 @@ static const float * get_shader_constant( /* Texture compare-fail value. */ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0). */ + * this is always (0,0,0,0), right? */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; case RC_STATE_R300_VIEWPORT_SCALE: - if (r300->rs_state->enable_vte) { - vec[0] = r300->viewport_state->xscale; - vec[1] = r300->viewport_state->yscale; - vec[2] = r300->viewport_state->zscale; + /* XXX argfl stop crossing state */ + if (vte_enabled) { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; } else { vec[0] = 1; vec[1] = 1; @@ -185,10 +189,10 @@ static const float * get_shader_constant( break; case RC_STATE_R300_VIEWPORT_OFFSET: - if (r300->rs_state->enable_vte) { - vec[0] = r300->viewport_state->xoffset; - vec[1] = r300->viewport_state->yoffset; - vec[2] = r300->viewport_state->zoffset; + if (vte_enabled) { + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; } else { /* Zeros. */ } @@ -576,8 +580,9 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +void r300_emit_rs_state(struct r300_context* r300, void* state) { + struct r300_rs_state* rs = (struct r300_rs_state*)state; CS_LOCALS(r300); BEGIN_CS(22); @@ -655,7 +660,8 @@ static void r300_emit_scissor_regs(struct r300_context* r300, void r300_emit_scissor_state(struct r300_context* r300, struct r300_scissor_state* scissor) { - if (r300->rs_state->rs.scissor) { + /* XXX argfl! */ + if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { r300_emit_scissor_regs(r300, &scissor->scissor); } else { r300_emit_scissor_regs(r300, &scissor->framebuffer); @@ -926,7 +932,8 @@ void r300_emit_viewport_state(struct r300_context* r300, OUT_CS_32F(viewport->zscale); OUT_CS_32F(viewport->zoffset); - if (r300->rs_state->enable_vte) { + /* XXX words fail me. */ + if (((struct r300_rs_state*)r300->rs_state.state)->enable_vte) { OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); } else { OUT_CS_REG(R300_VAP_VTE_CNTL, 0); @@ -1100,11 +1107,6 @@ validate: r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } - if (r300->dirty_state & R300_NEW_RASTERIZER) { - r300_emit_rs_state(r300, r300->rs_state); - r300->dirty_state &= ~R300_NEW_RASTERIZER; - } - if (r300->dirty_state & R300_NEW_RS_BLOCK) { r300_emit_rs_block_state(r300, r300->rs_block); r300->dirty_state &= ~R300_NEW_RS_BLOCK; diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 0ccae0031b..a6539b218b 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -59,7 +59,7 @@ void r300_emit_query_begin(struct r300_context* r300, void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); +void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index a4ac9ad9a7..7098f66f0c 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -71,14 +71,15 @@ uint32_t r300_translate_primitive(unsigned prim) static boolean r300_nothing_to_draw(struct r300_context *r300) { - return r300->rs_state->rs.scissor && + return ((struct r300_rs_state*)r300->rs_state.state)->rs.scissor && r300->scissor_state->scissor.empty_area; } static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { - uint32_t color_control = r300->rs_state->color_control; + struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; + uint32_t color_control = rs->color_control; /* By default (see r300_state.c:r300_create_rs_state) color_control is * initialized to provoking the first vertex. @@ -98,7 +99,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, * ~ C. */ - if (r300->rs_state->rs.flatshade_first) { + if (rs->rs.flatshade_first) { switch (mode) { case PIPE_PRIM_TRIANGLE_FAN: color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 288b2149b5..8740a082b9 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -519,7 +519,8 @@ static void r300_screen(r300->context.screen)->caps->is_r500); /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || !r300->rs_state->rs.scissor) { + /* XXX ( >&) */ + if (!r300->rs_state.state) { r300->dirty_state |= R300_NEW_SCISSOR; } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; @@ -721,9 +722,10 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->rs_state = rs; + r300->rs_state.state = rs; + r300->rs_state.dirty = TRUE; + /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; r300->dirty_state |= R300_NEW_SCISSOR; r300->dirty_state |= R300_NEW_VIEWPORT; @@ -868,10 +870,7 @@ static void r300_set_scissor_state(struct pipe_context* pipe, r300_set_scissor_regs(state, &r300->scissor_state->scissor, r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } + r300->dirty_state |= R300_NEW_SCISSOR; } static void r300_set_viewport_state(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a4029fca49..e82aa07a62 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -139,7 +139,7 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (r300->rs_state->enable_vte) { + if (((struct r300_rs_state*)r300->rs_state.state)->enable_vte) { stream_tab = identity; } else { stream_tab = r300->vs->stream_loc_notcl; -- cgit v1.2.3 From 7b569bef1571d32e00898337399da62a6ed7d0fe Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 17:42:06 -0800 Subject: r300g: Don't avoid rewriting ZTOP. Simplify the code. Added a comment to keep me from doing it again. --- src/gallium/drivers/r300/r300_state_derived.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index e82aa07a62..4396978fd1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -510,7 +510,6 @@ static void r300_update_ztop(struct r300_context* r300) { struct r300_ztop_state* ztop_state = (struct r300_ztop_state*)r300->ztop_state.state; - uint32_t ztop = ztop_state->z_buffer_top; /* This is important enough that I felt it warranted a comment. * @@ -532,6 +531,10 @@ static void r300_update_ztop(struct r300_context* r300) * 5) Depth writes in fragment shader * 6) Outstanding occlusion queries * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * * ~C. */ @@ -539,19 +542,16 @@ static void r300_update_ztop(struct r300_context* r300) if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ r300->fs->info.uses_kill)) { /* (2) */ - ztop = R300_ZTOP_DISABLE; + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - ztop = R300_ZTOP_DISABLE; + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; } else if (r300->query_current) { /* (6) */ - ztop = R300_ZTOP_DISABLE; + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; } else { - ztop = R300_ZTOP_ENABLE; + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - if (ztop_state->z_buffer_top != ztop) { - ztop_state->z_buffer_top = ztop; - r300->ztop_state.dirty = TRUE; - } + r300->ztop_state.dirty = TRUE; } void r300_update_derived_state(struct r300_context* r300) -- cgit v1.2.3 From e7d760ff0974aa6eb53fc43ec8f796b4e2410365 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 17:51:40 -0800 Subject: r300g: Move ROPCNTL to the top of the emit order. According to the docs, this decreases stalls, and indeed we get a tiny bit more glxgears from it. --- src/gallium/drivers/r300/r300_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 60e4a109d4..cfeb99a0ad 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -42,6 +42,7 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); BEGIN_CS(8); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (r300->framebuffer_state.nr_cbufs) { OUT_CS(blend->blend_control); @@ -53,7 +54,6 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) OUT_CS(0); /* XXX also disable fastfill here once it's supported */ } - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; } -- cgit v1.2.3 From 47f59cfc8eba6574c9ca0ae8799e8fbd9b393fb7 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 18:07:28 -0800 Subject: r300g: Atomize viewport. Goddammit, some of these hax are really annoying. --- src/gallium/drivers/r300/r300_context.c | 5 +++-- src/gallium/drivers/r300/r300_context.h | 3 +-- src/gallium/drivers/r300/r300_emit.c | 13 ++++--------- src/gallium/drivers/r300/r300_emit.h | 3 +-- src/gallium/drivers/r300/r300_state.c | 32 +++++++++++++++++--------------- 5 files changed, 26 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 9319b5ecfc..f003451083 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -75,7 +75,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->rs_block); FREE(r300->scissor_state); FREE(r300->vertex_info); - FREE(r300->viewport_state); + FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300); } @@ -125,6 +125,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(clip); R300_INIT_ATOM(dsa); R300_INIT_ATOM(rs); + R300_INIT_ATOM(viewport); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -182,7 +183,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e8c56bbf87..70b381fe74 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -156,7 +156,6 @@ struct r300_ztop_state { #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 -#define R300_NEW_VIEWPORT 0x20000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -308,7 +307,7 @@ struct r300_context { /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ - struct r300_viewport_state* viewport_state; + struct r300_atom viewport_state; /* ZTOP state. */ struct r300_atom ztop_state; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index cfeb99a0ad..aa21682e4b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -146,7 +146,7 @@ static const float * get_shader_constant( struct r300_constant_buffer * externals) { struct r300_viewport_state* viewport = - (struct r300_viewport_state*)r300->viewport_state; + (struct r300_viewport_state*)r300->viewport_state.state; boolean vte_enabled = viewport->vte_control & ~R300_VTX_W0_FMT; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -918,9 +918,9 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport) +void r300_emit_viewport_state(struct r300_context* r300, void* state) { + struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); BEGIN_CS(9); @@ -932,7 +932,7 @@ void r300_emit_viewport_state(struct r300_context* r300, OUT_CS_32F(viewport->zscale); OUT_CS_32F(viewport->zoffset); - /* XXX words fail me. */ + /* XXX words still fail me. */ if (((struct r300_rs_state*)r300->rs_state.state)->enable_vte) { OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); } else { @@ -1138,11 +1138,6 @@ validate: r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } - if (r300->dirty_state & R300_NEW_VIEWPORT) { - r300_emit_viewport_state(r300, r300->viewport_state); - r300->dirty_state &= ~R300_NEW_VIEWPORT; - } - if (dirty_tex) { r300_flush_textures(r300); } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index a6539b218b..1a76de0da6 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -85,8 +85,7 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport); +void r300_emit_viewport_state(struct r300_context* r300, void* state); void r300_emit_texture_count(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 8740a082b9..4c0b307ea8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -724,11 +724,11 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; /* XXX */ /* XXX Clean these up when we move to atom emits */ r300->dirty_state |= R300_NEW_RS_BLOCK; r300->dirty_state |= R300_NEW_SCISSOR; - r300->dirty_state |= R300_NEW_VIEWPORT; if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -877,36 +877,38 @@ static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + viewport->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + viewport->xscale = state->scale[0]; + viewport->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + viewport->yscale = state->scale[1]; + viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + viewport->zscale = state->scale[2]; + viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; } if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + viewport->xoffset = state->translate[0]; + viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; } if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + viewport->yoffset = state->translate[1]; + viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; } if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; + viewport->zoffset = state->translate[2]; + viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->dirty_state |= R300_NEW_VIEWPORT; + r300->viewport_state.dirty = TRUE; if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } -- cgit v1.2.3 From 8ca491386d0fb9e675e7dfbdd05bc09af74d75d3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 22:32:58 -0800 Subject: r300g: Move TCL bypass switch to main context. --- src/gallium/drivers/r300/r300_context.h | 7 ++--- src/gallium/drivers/r300/r300_emit.c | 44 ++++++++++++--------------- src/gallium/drivers/r300/r300_state.c | 7 +++-- src/gallium/drivers/r300/r300_state_derived.c | 9 +++--- 4 files changed, 31 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 70b381fe74..05ea7ad0f9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -71,11 +71,6 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; - /* Whether or not to enable the VTE. This is referenced at the very - * last moment during emission of VTE state, to decide whether or not - * the VTE should be used for transformation. */ - boolean enable_vte; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -322,6 +317,8 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + /* Whether the TCL engine should be in bypass mode. */ + boolean tcl_bypass; /** Combination of DBG_xxx flags */ unsigned debug; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index aa21682e4b..86d4000d08 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -147,7 +147,6 @@ static const float * get_shader_constant( { struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; - boolean vte_enabled = viewport->vte_control & ~R300_VTX_W0_FMT; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -176,25 +175,22 @@ static const float * get_shader_constant( break; case RC_STATE_R300_VIEWPORT_SCALE: - /* XXX argfl stop crossing state */ - if (vte_enabled) { - vec[0] = viewport->xscale; - vec[1] = viewport->yscale; - vec[2] = viewport->zscale; - } else { + if (r300->tcl_bypass) { vec[0] = 1; vec[1] = 1; vec[2] = 1; + } else { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; } break; case RC_STATE_R300_VIEWPORT_OFFSET: - if (vte_enabled) { + if (!r300->tcl_bypass) { vec[0] = viewport->xoffset; vec[1] = viewport->yoffset; vec[2] = viewport->zoffset; - } else { - /* Zeros. */ } break; @@ -923,22 +919,22 @@ void r300_emit_viewport_state(struct r300_context* r300, void* state) struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - - /* XXX words still fail me. */ - if (((struct r300_rs_state*)r300->rs_state.state)->enable_vte) { - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - } else { + if (r300->tcl_bypass) { + BEGIN_CS(2); OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + END_CS; + } else { + BEGIN_CS(9); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } - END_CS; } void r300_emit_texture_count(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 4c0b307ea8..da1f40c8ef 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -605,8 +605,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; - rs->enable_vte = !state->bypass_vs_clip_and_viewport; - #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; #else @@ -722,9 +720,12 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; - r300->viewport_state.dirty = TRUE; /* XXX */ + /* XXX Why is this still needed, dammit!? */ + r300->viewport_state.dirty = TRUE; /* XXX Clean these up when we move to atom emits */ r300->dirty_state |= R300_NEW_RS_BLOCK; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 4396978fd1..192846411b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (((struct r300_rs_state*)r300->rs_state.state)->enable_vte) { - stream_tab = identity; - } else { + if (r300->tcl_bypass) { stream_tab = r300->vs->stream_loc_notcl; + } else { + stream_tab = identity; } /* Vertex shaders have no semantics on their inputs, @@ -556,9 +556,10 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { + /* XXX */ if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT)) { + R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } -- cgit v1.2.3 From 1e9ffb63401b7bdaf05c53e07b588128566b437d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 23:02:48 -0800 Subject: r300g: Atomize scissors. Argfl. Some of this code is so questionable. --- src/gallium/drivers/r300/r300_context.c | 5 +++-- src/gallium/drivers/r300/r300_context.h | 3 +-- src/gallium/drivers/r300/r300_emit.c | 9 ++------- src/gallium/drivers/r300/r300_emit.h | 3 +-- src/gallium/drivers/r300/r300_render.c | 2 +- src/gallium/drivers/r300/r300_state.c | 26 ++++++++++++++------------ 6 files changed, 22 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index f003451083..af95bbe789 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -73,7 +73,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->rs_block); - FREE(r300->scissor_state); + FREE(r300->scissor_state.state); FREE(r300->vertex_info); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); @@ -125,6 +125,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(clip); R300_INIT_ATOM(dsa); R300_INIT_ATOM(rs); + R300_INIT_ATOM(scissor); R300_INIT_ATOM(viewport); } @@ -181,7 +182,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 05ea7ad0f9..70a04d6f44 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -145,7 +145,6 @@ struct r300_ztop_state { #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_SCISSOR 0x00020000 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 @@ -295,7 +294,7 @@ struct r300_context { struct r300_sampler_state* sampler_states[8]; int sampler_count; /* Scissor state. */ - struct r300_scissor_state* scissor_state; + struct r300_atom scissor_state; /* Texture states. */ struct r300_texture* textures[8]; int texture_count; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 86d4000d08..0e5533c790 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -653,9 +653,9 @@ static void r300_emit_scissor_regs(struct r300_context* r300, END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; /* XXX argfl! */ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { r300_emit_scissor_regs(r300, &scissor->scissor); @@ -1108,11 +1108,6 @@ validate: r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_NEW_SCISSOR) { - r300_emit_scissor_state(r300, r300->scissor_state); - r300->dirty_state &= ~R300_NEW_SCISSOR; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 1a76de0da6..05a6bfeae8 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -64,8 +64,7 @@ void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor); +void r300_emit_scissor_state(struct r300_context* r300, void* state); void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 7098f66f0c..2bd3acea41 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -72,7 +72,7 @@ uint32_t r300_translate_primitive(unsigned prim) static boolean r300_nothing_to_draw(struct r300_context *r300) { return ((struct r300_rs_state*)r300->rs_state.state)->rs.scissor && - r300->scissor_state->scissor.empty_area; + ((struct r300_scissor_state*)r300->scissor_state.state)->scissor.empty_area; } static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index da1f40c8ef..86773adc8d 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -504,7 +504,9 @@ static void const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct pipe_scissor_state scissor; + struct r300_scissor_state* scissor = + (struct r300_scissor_state*)r300->scissor_state.state; + struct pipe_scissor_state pscissor; if (r300->draw) { draw_flush(r300->draw); @@ -512,21 +514,19 @@ static void r300->framebuffer_state = *state; - scissor.minx = scissor.miny = 0; - scissor.maxx = state->width; - scissor.maxy = state->height; - r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, + /* XXX Arg. This is silly. */ + pscissor.minx = pscissor.miny = 0; + pscissor.maxx = state->width; + pscissor.maxy = state->height; + r300_set_scissor_regs(&pscissor, &scissor->framebuffer, r300_screen(r300->context.screen)->caps->is_r500); /* Don't rely on the order of states being set for the first time. */ - /* XXX ( >&) */ - if (!r300->rs_state.state) { - r300->dirty_state |= R300_NEW_SCISSOR; - } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -725,11 +725,11 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; r300->viewport_state.dirty = TRUE; /* XXX Clean these up when we move to atom emits */ r300->dirty_state |= R300_NEW_RS_BLOCK; - r300->dirty_state |= R300_NEW_SCISSOR; if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -867,11 +867,13 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_scissor_state* scissor = + (struct r300_scissor_state*)r300->scissor_state.state; - r300_set_scissor_regs(state, &r300->scissor_state->scissor, + r300_set_scissor_regs(state, &scissor->scissor, r300_screen(r300->context.screen)->caps->is_r500); - r300->dirty_state |= R300_NEW_SCISSOR; + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, -- cgit v1.2.3 From bfcafbe15dc98d747ba4c63305c9cf2c4cdbe573 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 10 Jan 2010 23:38:05 -0800 Subject: r300g: Don't bother testing for empty scissors. If somebody goes through that much effort, they probably intended it. So humor them. :3 --- src/gallium/drivers/r300/r300_context.h | 3 --- src/gallium/drivers/r300/r300_render.c | 22 ---------------------- src/gallium/drivers/r300/r300_state.c | 3 --- 3 files changed, 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 70a04d6f44..5937f0e2cc 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -109,9 +109,6 @@ struct r300_sampler_state { struct r300_scissor_regs { uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - - /* Whether everything is culled by scissoring. */ - boolean empty_area; }; struct r300_scissor_state { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2bd3acea41..ee43421cdb 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -69,12 +69,6 @@ uint32_t r300_translate_primitive(unsigned prim) } } -static boolean r300_nothing_to_draw(struct r300_context *r300) -{ - return ((struct r300_rs_state*)r300->rs_state.state)->rs.scissor && - ((struct r300_scissor_state*)r300->scissor_state.state)->scissor.empty_area; -} - static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { @@ -236,10 +230,6 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } - if (r300_nothing_to_draw(r300)) { - return; - } - r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { @@ -289,10 +279,6 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, return; } - if (r300_nothing_to_draw(r300)) { - return; - } - r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { @@ -324,10 +310,6 @@ void r300_swtcl_draw_arrays(struct pipe_context* pipe, return; } - if (r300_nothing_to_draw(r300)) { - return; - } - for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffer[i].buffer, @@ -369,10 +351,6 @@ void r300_swtcl_draw_range_elements(struct pipe_context* pipe, return; } - if (r300_nothing_to_draw(r300)) { - return; - } - for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffer[i].buffer, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 86773adc8d..78764ddc98 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -494,9 +494,6 @@ static void r300_set_scissor_regs(const struct pipe_scissor_state* state, (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } - - scissor->empty_area = state->minx >= state->maxx || - state->miny >= state->maxy; } static void -- cgit v1.2.3 From 70c8d2a29724d018bacc4a68ddc61db08faea00d Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 11 Jan 2010 16:30:48 +0100 Subject: gallium: remove const qualifier from pipe_buffer argument in set_constant_buffer --- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/failover/fo_context.h | 2 +- src/gallium/drivers/failover/fo_state.c | 2 +- src/gallium/drivers/i915/i915_state.c | 2 +- src/gallium/drivers/i965/brw_pipe_shader.c | 2 +- src/gallium/drivers/identity/id_context.c | 2 +- src/gallium/drivers/llvmpipe/lp_state.h | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- src/gallium/drivers/nv04/nv04_state.c | 2 +- src/gallium/drivers/nv10/nv10_state.c | 2 +- src/gallium/drivers/nv20/nv20_state.c | 2 +- src/gallium/drivers/nv30/nv30_state.c | 2 +- src/gallium/drivers/nv40/nv40_state.c | 2 +- src/gallium/drivers/nv50/nv50_state.c | 2 +- src/gallium/drivers/r300/r300_state.c | 2 +- src/gallium/drivers/softpipe/sp_state.h | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/svga/svga_pipe_constants.c | 2 +- src/gallium/drivers/trace/tr_context.c | 2 +- src/gallium/include/pipe/p_context.h | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index cf5d681499..1b09cf7f7d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -183,7 +183,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct cell_context *cell = cell_context(pipe); diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index c8be885457..191a44c3df 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -125,7 +125,7 @@ failover_context( struct pipe_context *pipe ) void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf); + struct pipe_buffer *buf); #endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 55ccab6e98..d6ec4d1313 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -495,7 +495,7 @@ failover_set_vertex_elements(struct pipe_context *pipe, void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct failover_context *failover = failover_context(pipe); diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 4d48b5a0be..f7ebfadd59 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -517,7 +517,7 @@ static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) static void i915_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct i915_context *i915 = i915_context(pipe); struct pipe_screen *screen = pipe->screen; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index ede6b347a5..6c376e5e5d 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -255,7 +255,7 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) static void brw_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct brw_context *brw = brw_context(pipe); diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index fb4d91a114..b975182e7b 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -404,7 +404,7 @@ static void identity_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_buffer *_buffer) + struct pipe_buffer *_buffer) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 131e3621e9..8e0c773a63 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -155,7 +155,7 @@ void llvmpipe_set_clip_state( struct pipe_context *, void llvmpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_buffer *buf); + struct pipe_buffer *buf); void *llvmpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 867e8ceea7..d129918fe2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -713,7 +713,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *constants) + struct pipe_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned size = constants ? constants->size : 0; diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index ac69b29616..871034ad0b 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -332,7 +332,7 @@ nv04_set_clip_state(struct pipe_context *pipe, static void nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf ) + struct pipe_buffer *buf ) { struct nv04_context *nv04 = nv04_context(pipe); struct pipe_screen *pscreen = pipe->screen; diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 150ab5029c..ad7def53b1 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -458,7 +458,7 @@ nv10_set_clip_state(struct pipe_context *pipe, static void nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf ) + struct pipe_buffer *buf ) { struct nv10_context *nv10 = nv10_context(pipe); struct pipe_screen *pscreen = pipe->screen; diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index d7893b4f0f..45697a60ef 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -451,7 +451,7 @@ nv20_set_clip_state(struct pipe_context *pipe, static void nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf ) + struct pipe_buffer *buf ) { struct nv20_context *nv20 = nv20_context(pipe); struct pipe_screen *pscreen = pipe->screen; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index dfac46e23f..268d3a8ab8 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -590,7 +590,7 @@ nv30_set_clip_state(struct pipe_context *pipe, static void nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf ) + struct pipe_buffer *buf ) { struct nv30_context *nv30 = nv30_context(pipe); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 81ccbb5ea5..f6e5dee814 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -605,7 +605,7 @@ nv40_set_clip_state(struct pipe_context *pipe, static void nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf ) + struct pipe_buffer *buf ) { struct nv40_context *nv40 = nv40_context(pipe); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f462558aec..d609b4cbc6 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -588,7 +588,7 @@ nv50_set_clip_state(struct pipe_context *pipe, static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf ) + struct pipe_buffer *buf ) { struct nv50_context *nv50 = nv50_context(pipe); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index b627895d4c..541b0abc9c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -804,7 +804,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) static void r300_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct r300_context* r300 = r300_context(pipe); void *mapped; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 1169520b44..c41e718488 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -133,7 +133,7 @@ void softpipe_set_clip_state( struct pipe_context *, void softpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_buffer *buf); + struct pipe_buffer *buf); void *softpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index fa85946c6a..eba0563c62 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -152,7 +152,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) void softpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index ea0f833cea..ca2c7c49d7 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -49,7 +49,7 @@ struct svga_constbuf static void svga_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_buffer *buf) + struct pipe_buffer *buf) { struct svga_context *svga = svga_context(pipe); diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 11044d6845..8008b596c3 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -825,7 +825,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, static INLINE void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_buffer *buffer) + struct pipe_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 509d8e534b..9d19ec2f7f 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -156,7 +156,7 @@ struct pipe_context { void (*set_constant_buffer)( struct pipe_context *, uint shader, uint index, - const struct pipe_buffer *buf ); + struct pipe_buffer *buf ); void (*set_framebuffer_state)( struct pipe_context *, const struct pipe_framebuffer_state * ); -- cgit v1.2.3 From 22d615a9c4b95c528d6604eccb71cf6fe5193d5a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 11 Jan 2010 20:27:54 -0800 Subject: r300g: Avoid segfault on binding null RS state. --- src/gallium/drivers/r300/r300_state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 78764ddc98..4c22d4ebc4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -717,7 +717,11 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + } else { + r300->tcl_bypass = FALSE; + } r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; -- cgit v1.2.3 From e920ee23b32f6031a7b8527b540566e7ada6af8a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 11 Jan 2010 20:41:57 -0800 Subject: r300g: Placate kernel checker by explicitly disabling depth test. This probably should disable stencil, too, if the kernel cares enough. Note: When atomized, framebuffer setup should go towards the end anyway, but it *must* follow these test setups anyway. --- src/gallium/drivers/r300/r300_emit.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0e5533c790..2bf8fbd684 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -122,15 +122,8 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - - if (r300->framebuffer_state.zsbuf) { - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); - } else { - OUT_CS(0); - OUT_CS(0); - } - + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); OUT_CS(dsa->stencil_ref_mask); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ @@ -395,7 +388,7 @@ void r300_emit_fb_state(struct r300_context* r300, assert(fb->nr_cbufs <= 4); BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) + - (fb->zsbuf ? 10 : 0) + 6); + (fb->zsbuf ? 10 : 3) + 6); /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -431,7 +424,7 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); } - /* Set up a zbuffer. */ + /* Set up the Z/stencil buffer, or disable it. */ if (fb->zsbuf) { surf = fb->zsbuf; tex = (struct r300_texture*)surf->texture; @@ -445,6 +438,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); + } else { + OUT_CS_REG_SEQ(R300_ZB_CNTL, 2); + OUT_CS(0x0); + OUT_CS(0x0); } END_CS; -- cgit v1.2.3 From daa446fa8312337bad302f0ec4928eca49db9f32 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 11 Jan 2010 20:52:19 -0800 Subject: r300g: Add back dirty state check. From 07ea7e6c80. This is the only questionable part of that commit, AFAICT... --- src/gallium/drivers/r300/r300_emit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2bf8fbd684..5ac6ed866f 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1000,6 +1000,10 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->context.flush(&r300->context, 0, NULL); } + if (!(r300->dirty_state)) { + return; + } + /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); -- cgit v1.2.3 From ce1c493ff8fad4b62e2b66f06636ac6560a6e0ad Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 12 Jan 2010 02:09:07 -0800 Subject: r300g: Always emit scissors. Save some code, improve FPS, and fix piglit tests. Everybody wins. --- src/gallium/drivers/r300/r300_context.c | 4 ++- src/gallium/drivers/r300/r300_context.h | 11 +------- src/gallium/drivers/r300/r300_emit.c | 50 +++++++++++++++++++++++---------- src/gallium/drivers/r300/r300_state.c | 43 ++-------------------------- src/gallium/drivers/r300/r300_texture.c | 6 ++-- 5 files changed, 45 insertions(+), 69 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index af95bbe789..be83c3eca8 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -127,6 +127,8 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs); R300_INIT_ATOM(scissor); R300_INIT_ATOM(viewport); + + r300->scissor_state.always_dirty = TRUE; } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -182,7 +184,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 5937f0e2cc..34a759e494 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -40,6 +40,7 @@ struct r300_atom { void* state; void (*emit)(struct r300_context*, void*); boolean dirty; + boolean always_dirty; }; struct r300_blend_state { @@ -106,16 +107,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 5ac6ed866f..2f5df6a00a 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -638,27 +638,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, void* state) -{ - struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; - /* XXX argfl! */ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -1072,7 +1092,7 @@ validate: } foreach(atom, &r300->atom_list) { - if (atom->dirty) { + if (atom->dirty || atom->always_dirty) { atom->emit(r300, atom->state); atom->dirty = FALSE; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 4c22d4ebc4..a08aa3493d 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -474,36 +474,11 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - struct pipe_scissor_state pscissor; if (r300->draw) { draw_flush(r300->draw); @@ -511,19 +486,11 @@ static void r300->framebuffer_state = *state; - /* XXX Arg. This is silly. */ - pscissor.minx = pscissor.miny = 0; - pscissor.maxx = state->width; - pscissor.maxy = state->height; - r300_set_scissor_regs(&pscissor, &scissor->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ r300->dirty_state |= R300_NEW_FRAMEBUFFERS; r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; - r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -725,8 +692,6 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; - /* XXX Why is this still needed, dammit!? */ - r300->scissor_state.dirty = TRUE; r300->viewport_state.dirty = TRUE; /* XXX Clean these up when we move to atom emits */ @@ -868,13 +833,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - - r300_set_scissor_regs(state, &scissor->scissor, - r300_screen(r300->context.screen)->caps->is_r500); - r300->scissor_state.dirty = TRUE; + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); } static void r300_set_viewport_state(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4d..2784916ddf 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -115,6 +115,8 @@ static void r300_setup_miptree(struct r300_texture* tex) int stride, size, layer_size; int i; + debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format)); + for (i = 0; i <= base->last_level; i++) { unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); @@ -132,9 +134,9 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->pitch[i] = stride / util_format_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } -- cgit v1.2.3 From 86bfe974b880dc2cbf40b91ba0fde34e8a9c756e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 10 Jan 2010 12:58:11 +0000 Subject: gallium: Generalize the alignment macros to other compilers and any alignment. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 6 ++--- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2 +- src/gallium/drivers/cell/common.h | 3 ++- src/gallium/drivers/cell/ppu/cell_context.h | 8 +++---- src/gallium/drivers/cell/spu/spu_command.c | 5 ++-- src/gallium/drivers/cell/spu/spu_exec.c | 6 +++-- src/gallium/drivers/cell/spu/spu_exec.h | 4 ++-- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 22 ++++++++++-------- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 4 ++-- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 15 ++++++------ src/gallium/drivers/llvmpipe/lp_quad.h | 9 ++++---- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_blend.c | 20 ++++++++-------- src/gallium/drivers/llvmpipe/lp_test_conv.c | 4 ++-- src/gallium/include/pipe/p_compiler.h | 29 ++++++++++++++++-------- 17 files changed, 81 insertions(+), 62 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696..6179b5b65a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]); + PIPE_ALIGN_VAR(16, float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]); + PIPE_ALIGN_VAR(16, float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]); uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095b..cec5b11fd3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,7 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16, const float ppc_builtin_constants[]) = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba..aa29dcb394 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9..fa6e4f65cd 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, struct cell_fence fence); struct cell_buffer_node *head; }; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, struct cell_spu_function_info spu_functions); uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]); int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]); /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 12b855a3db..2a62db4b79 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16, static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]); @@ -543,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, qword buffer[CELL_BUFFER_SIZE / 16]); const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a5..6db8ed419b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16, union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d); unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16, union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i); unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 8605679940..c8c6183e2e 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16, struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]); struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d25..a4e560b0a5 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, struct cell_spu_function_info funcs); int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51..8500f1bb87 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, tile_t ctile); + PIPE_ALIGN_VAR(16, tile_t ztile); /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]); + PIPE_ALIGN_VAR(16, ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]); /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8, uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]); /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073ab..b13fe3184f 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, ubyte vertex_data[CELL_BUFFER_SIZE]); const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a5..43600dfe0b 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,7 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16, static const qword fetch_shuffle_data[5]) = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +110,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, qword in[2 * 4]); /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d39..49938a8001 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16, struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]); + PIPE_ALIGN_VAR(16, struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16, unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]); struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16, +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a..eb285e355e 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16, float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]); }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16, float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); + PIPE_ALIGN_VAR(16, float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); + PIPE_ALIGN_VAR(16, float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd..29033a067c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16, uint32_t mask[4][NUM_CHANNELS]); unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981..de8f872e58 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16, uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]); int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16, uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]); int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index faddfb9677..3a6353b916 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16, uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]); + PIPE_ALIGN_VAR(16, uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]); double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 26a940593f..80610a07b6 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -139,22 +139,33 @@ typedef unsigned char boolean; +/* Macros for data alignment. */ #if defined(__GNUC__) -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) -#define ALIGN16_ASSIGN(NAME) NAME##___aligned -#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */ +#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ +#define PIPE_ALIGN_VAR(_alignment, _decl) _decl __attribute__((aligned(_alignment))) + #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) #define ALIGN_STACK __attribute__((force_align_arg_pointer)) #else #define ALIGN_STACK #endif -#else -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] -#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) -#define ALIGN16_ATTRIB -#define ALIGN8_ATTRIB + +#elif defined(_MSC_VER) + +/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ +#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type +#define PIPE_ALIGN_VAR(_alignment, _decl) __declspec(align(_alignment)) _decl + #define ALIGN_STACK + +#else + +#error "Unsupported compiler" + #endif -- cgit v1.2.3 From 5dfd5ed5e7d23d4ee8572669af2673c3a1315763 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 12 Jan 2010 11:47:37 +0000 Subject: gallium: Simplify PIPE_ALIGN_VAR. gcc allows pre-fix variable attributes. Suggested by Ian Romanick. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 6 +++--- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 3 ++- src/gallium/drivers/cell/ppu/cell_context.h | 8 ++++---- src/gallium/drivers/cell/spu/spu_command.c | 4 ++-- src/gallium/drivers/cell/spu/spu_exec.c | 8 ++++---- src/gallium/drivers/cell/spu/spu_exec.h | 4 ++-- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 10 +++++----- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 5 +++-- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 12 ++++++------ src/gallium/drivers/llvmpipe/lp_quad.h | 8 ++++---- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_blend.c | 20 ++++++++++---------- src/gallium/drivers/llvmpipe/lp_test_conv.c | 4 ++-- src/gallium/include/pipe/p_compiler.h | 4 ++-- 16 files changed, 52 insertions(+), 50 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 6179b5b65a..da9f3e3d35 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - PIPE_ALIGN_VAR(16, float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]); - PIPE_ALIGN_VAR(16, float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]); - PIPE_ALIGN_VAR(16, float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]); + PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]; + PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]; + PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]; uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index cec5b11fd3..ad553c71a5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -PIPE_ALIGN_VAR(16, const float ppc_builtin_constants[]) = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index fa6e4f65cd..3fb6a3227c 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - PIPE_ALIGN_VAR(16, struct cell_fence fence); + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - PIPE_ALIGN_VAR(16, struct cell_spu_function_info spu_functions); + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - PIPE_ALIGN_VAR(16, ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]); + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - PIPE_ALIGN_VAR(16, uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]); + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 2a62db4b79..55bd85bde2 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,7 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -PIPE_ALIGN_VAR(16, static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]); +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -542,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - PIPE_ALIGN_VAR(16, qword buffer[CELL_BUFFER_SIZE / 16]); + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 6db8ed419b..d2166a4901 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,11 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { - PIPE_ALIGN_VAR(16, + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d); + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1854,11 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { - PIPE_ALIGN_VAR(16, + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i); + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index c8c6183e2e..0ca92af248 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ - PIPE_ALIGN_VAR(16, + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]); + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index a4e560b0a5..98919c43ff 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - PIPE_ALIGN_VAR(16, struct cell_spu_function_info funcs); + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 8500f1bb87..b18f4c22ef 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -159,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - PIPE_ALIGN_VAR(16, tile_t ctile); - PIPE_ALIGN_VAR(16, tile_t ztile); + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -169,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - PIPE_ALIGN_VAR(16, ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]); - PIPE_ALIGN_VAR(16, ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]); + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -179,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - PIPE_ALIGN_VAR(8, uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]); + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index b13fe3184f..14987e3c3a 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - PIPE_ALIGN_VAR(16, ubyte vertex_data[CELL_BUFFER_SIZE]); + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 43600dfe0b..087963960d 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -PIPE_ALIGN_VAR(16, static const qword fetch_shuffle_data[5]) = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - PIPE_ALIGN_VAR(16, qword in[2 * 4]); + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 49938a8001..3e9804bf8e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - PIPE_ALIGN_VAR(16, struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]); - PIPE_ALIGN_VAR(16, struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -132,9 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; - PIPE_ALIGN_VAR(16, + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE]); + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -187,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -PIPE_ALIGN_VAR(16, -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index eb285e355e..c3a48700a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -84,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - PIPE_ALIGN_VAR(16, float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]); + PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; }; @@ -93,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - PIPE_ALIGN_VAR(16, float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); - PIPE_ALIGN_VAR(16, float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); - PIPE_ALIGN_VAR(16, float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); + PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 29033a067c..9b96bba727 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - PIPE_ALIGN_VAR(16, uint32_t mask[4][NUM_CHANNELS]); + PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index de8f872e58..7458ef7544 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - PIPE_ALIGN_VAR(16, uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - PIPE_ALIGN_VAR(16, uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]); - PIPE_ALIGN_VAR(16, uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 3a6353b916..3b48eac382 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - PIPE_ALIGN_VAR(16, uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]); - PIPE_ALIGN_VAR(16, uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]); + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 80610a07b6..5f15e8203d 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -146,7 +146,7 @@ typedef unsigned char boolean; #define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) /* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ -#define PIPE_ALIGN_VAR(_alignment, _decl) _decl __attribute__((aligned(_alignment))) +#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment))) #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) #define ALIGN_STACK __attribute__((force_align_arg_pointer)) @@ -158,7 +158,7 @@ typedef unsigned char boolean; /* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ #define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type -#define PIPE_ALIGN_VAR(_alignment, _decl) __declspec(align(_alignment)) _decl +#define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment)) #define ALIGN_STACK -- cgit v1.2.3 From 26c78a4968a3c10ca006699d240150e6aa4b4250 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 12 Jan 2010 12:15:24 +0000 Subject: gallium: Rename ALIGN_STACK -> PIPE_ALIGN_STACK for consistency. --- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_blend.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_conv.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_format.c | 2 +- src/gallium/include/pipe/p_compiler.h | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9b96bba727..0b2d3a2801 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -117,7 +117,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ -ALIGN_STACK +PIPE_ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 7458ef7544..6c29e8d8ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 3b48eac382..c1abee424c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7..2b258f1052 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 5f15e8203d..18ebd0c948 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -149,9 +149,9 @@ typedef unsigned char boolean; #define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment))) #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) -#define ALIGN_STACK __attribute__((force_align_arg_pointer)) +#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer)) #else -#define ALIGN_STACK +#define PIPE_ALIGN_STACK #endif #elif defined(_MSC_VER) @@ -160,7 +160,7 @@ typedef unsigned char boolean; #define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type #define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment)) -#define ALIGN_STACK +#define PIPE_ALIGN_STACK #else -- cgit v1.2.3 From c83093973e7e3ffb494a9d6ce265180664497437 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 12 Jan 2010 17:38:52 +0100 Subject: nv50: fix memory leak on nv50_pc free --- src/gallium/drivers/nv50/nv50_program.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index e16fa479e5..069f815938 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3622,6 +3622,8 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } -- cgit v1.2.3 From 90762bd3550338368d989c0b212c10b6e33b023b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 12 Jan 2010 17:39:23 +0100 Subject: nv50: fix 2 off by one memory leaks (nv50_miptree_level->image_offset) --- src/gallium/drivers/nv50/nv50_miptree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a13..cecb1efc90 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -145,7 +145,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +188,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); -- cgit v1.2.3 From 8c53a2576ec998936981c354b02979f803c0e4de Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 12 Jan 2010 18:51:27 +0100 Subject: gallium: draw_arrays/elements_instanced() are of type void. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 44 +++++++++++++-------------- src/gallium/drivers/softpipe/sp_state.h | 4 +-- src/gallium/include/pipe/p_context.h | 30 +++++++++--------- 3 files changed, 39 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 87312ae151..b3ece9d8ed 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -172,7 +172,7 @@ softpipe_draw_elements(struct pipe_context *pipe, 1); } -boolean +void softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, @@ -180,19 +180,19 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { - return softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); } -boolean +void softpipe_draw_elements_instanced(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -202,16 +202,16 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { - return softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); } static boolean diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index f8886565e9..3153d6e6a4 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -199,7 +199,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, @@ -207,7 +207,7 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); -boolean +void softpipe_draw_elements_instanced(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 6394e095d3..f2242d2069 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -69,21 +69,21 @@ struct pipe_context { unsigned indexSize, unsigned mode, unsigned start, unsigned count); - boolean (*draw_arrays_instanced)(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - boolean (*draw_elements_instanced)(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); + void (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. -- cgit v1.2.3 From 66334847744f2547a6891cbcf2191306a72f04ab Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 12 Jan 2010 21:50:10 +0100 Subject: r300g: mark all states as dirty after flush It fixes almost all regressions introduced lately. --- src/gallium/drivers/r300/r300_flush.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc..c78a7673a3 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,6 +37,7 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from @@ -54,7 +55,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; -- cgit v1.2.3 From a2926a2a8bbd72b6b78adf88478aa02c9624b289 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 12 Jan 2010 23:23:05 +0100 Subject: Revert "r300g: Placate kernel checker by explicitly disabling depth test." This reverts commit e920ee23b32f6031a7b8527b540566e7ada6af8a. Assuming the FB state is not dirty and the DSA state is, then the depth test will be re-enabled, making the kernel checker angry. Reverting the commit fixes piglit/glsl-bug-22603. --- src/gallium/drivers/r300/r300_emit.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2f5df6a00a..559651de64 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -122,8 +122,15 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + + if (r300->framebuffer_state.zsbuf) { + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + } else { + OUT_CS(0); + OUT_CS(0); + } + OUT_CS(dsa->stencil_ref_mask); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ @@ -388,7 +395,7 @@ void r300_emit_fb_state(struct r300_context* r300, assert(fb->nr_cbufs <= 4); BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) + - (fb->zsbuf ? 10 : 3) + 6); + (fb->zsbuf ? 10 : 0) + 6); /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -424,7 +431,7 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); } - /* Set up the Z/stencil buffer, or disable it. */ + /* Set up a zbuffer. */ if (fb->zsbuf) { surf = fb->zsbuf; tex = (struct r300_texture*)surf->texture; @@ -438,10 +445,6 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); - } else { - OUT_CS_REG_SEQ(R300_ZB_CNTL, 2); - OUT_CS(0x0); - OUT_CS(0x0); } END_CS; -- cgit v1.2.3 From f3c4f2ef623942d65d1e8c38110bf059e286b36a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 12 Jan 2010 23:55:37 +0100 Subject: r300g: only emit scissor when needed Reverting some bits from ce1c493ff8fad4b62e2b66f06636ac6560a6e0ad. Given the latest fixes, it's not needed to always emit scissor, really. --- src/gallium/drivers/r300/r300_context.c | 2 -- src/gallium/drivers/r300/r300_state.c | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index be83c3eca8..98a5bb8e5f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -127,8 +127,6 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs); R300_INIT_ATOM(scissor); R300_INIT_ATOM(viewport); - - r300->scissor_state.always_dirty = TRUE; } struct pipe_context* r300_create_context(struct pipe_screen* screen, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a08aa3493d..00f1b23117 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -491,6 +491,7 @@ static void r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -692,6 +693,8 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; + /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; r300->viewport_state.dirty = TRUE; /* XXX Clean these up when we move to atom emits */ @@ -836,6 +839,7 @@ static void r300_set_scissor_state(struct pipe_context* pipe, memcpy(r300->scissor_state.state, state, sizeof(struct pipe_scissor_state)); + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, -- cgit v1.2.3 From 4c9a8a14e56b6c45c74f2a27bfa8400b89488ee4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 9 Jan 2010 19:23:21 +0100 Subject: r300g: silence a warning --- src/gallium/drivers/r300/r300_cs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee050..151f72b0fe 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) -- cgit v1.2.3 From e279d182323f27aea2c5bf6074c5f2724f7cd812 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 8 Jan 2010 14:08:02 +0100 Subject: r300g: add is_r400 flag r4xx has some additional fragment shader registers compared to r3xx. --- src/gallium/drivers/r300/r300_chipset.c | 14 ++++++++++++++ src/gallium/drivers/r300/r300_chipset.h | 9 ++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff3..2ad968427d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,36 +188,42 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x791E: case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +231,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +240,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a7..2808486492 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; -- cgit v1.2.3 From 99ea4c0c7e75ffe8ee960fe567f57f6d8532ab55 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 10 Jan 2010 02:29:02 +0100 Subject: r300g: add emission of texture tiling bits --- src/gallium/drivers/r300/r300_context.h | 9 +++++++++ src/gallium/drivers/r300/r300_emit.c | 18 ++++++++++++------ src/gallium/drivers/r300/r300_reg.h | 16 +++++++++++----- 3 files changed, 32 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 34a759e494..7e5de40ded 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -179,6 +179,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -215,6 +221,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 559651de64..1092ec42c8 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -419,8 +419,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -443,8 +445,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -717,8 +721,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 034bfc15cf..361813891f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2283,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2544,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) -- cgit v1.2.3 From 46992664d18ce28ce9dbbf43ccc94443e1fdd00c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 10 Jan 2010 07:09:56 +0100 Subject: r300g: update the texture initialization so that it respects tiling This is still work-in-progress and tiling is not enabled by default. --- src/gallium/drivers/r300/r300_texture.c | 56 +++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 2784916ddf..a9bbdd56d8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,6 +30,18 @@ #include "r300_texture.h" #include "r300_screen.h" +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; @@ -91,36 +103,68 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } } +/** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + /** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(u_minify(tex->tex.width0, level), tile_width); + + /* Should already be aligned except for S3TC. */ + return align(util_format_get_stride(tex->tex.format, width), 32); +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(u_minify(tex->tex.height0, level), tile_height); + + return util_format_get_nblocksy(tex->tex.format, height); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format)); for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); - stride = r300_texture_get_stride(tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -165,7 +209,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); -- cgit v1.2.3 From 50ae9c607350be58422132e293b4965b14c55464 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 8 Jan 2010 14:09:57 +0100 Subject: r300g: emit blend LTE/GTE thresholds on >=RV350 As per classic r300. --- src/gallium/drivers/r300/r300_state_invariant.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index f25f3ca217..b0f309695c 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -115,10 +115,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { + + if (caps->family >= CHIP_FAMILY_RV350) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); -- cgit v1.2.3 From 948b7e97a6d82dc61debc741c2970d9decfdf302 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 12 Jan 2010 21:50:14 -0800 Subject: r300g: Switch to immediate mode for tiny renders. Useful for e.g. blitter. --- src/gallium/drivers/r300/r300_render.c | 44 ++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ee43421cdb..d28931f037 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -114,6 +114,39 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + for (i = 0; i < count * vertex_size; i++) { + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -264,7 +297,7 @@ void r300_draw_elements(struct pipe_context* pipe, } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); @@ -287,9 +320,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); + if (count < 10 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** -- cgit v1.2.3 From c1db20280c5c36721801c893dd0615551d68394a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 12 Jan 2010 21:52:00 -0800 Subject: r300g: Fix is_r400 status for rs4xx and rc4xx. As suggested by agd5f. --- src/gallium/drivers/r300/r300_chipset.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 2ad968427d..92de297ef1 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -195,28 +195,24 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5955: caps->family = CHIP_FAMILY_RS480; caps->has_tcl = FALSE; - caps->is_r400 = TRUE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; caps->has_tcl = FALSE; - caps->is_r400 = TRUE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; caps->has_tcl = FALSE; - caps->is_r400 = TRUE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; caps->has_tcl = FALSE; - caps->is_r400 = TRUE; break; case 0x791E: -- cgit v1.2.3 From e4e5acc833d607bdf5cdd728f8a8c5064ea38838 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 12 Jan 2010 22:22:22 -0800 Subject: r300g: Lower the immd mode threshold. --- src/gallium/drivers/r300/r300_render.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index d28931f037..2edb9c6030 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -138,9 +138,14 @@ static void r300_emit_draw_immediate(struct r300_context *r300, OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); for (i = 0; i < count * vertex_size; i++) { - OUT_CS(*map); - map++; + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; } END_CS; @@ -320,7 +325,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); - if (count < 10 && r300->vertex_buffer_count == 1) { + if (count <= 4 && r300->vertex_buffer_count == 1) { r300_emit_draw_immediate(r300, mode, start, count); } else { r300_emit_aos(r300, start); -- cgit v1.2.3 From ae4eb251324df7f1b061a11107ddefd9a5d54c01 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 13 Jan 2010 00:57:03 -0800 Subject: r300g: DCE in emit. --- src/gallium/drivers/r300/r300_emit.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1092ec42c8..f1acdfc84d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -790,32 +790,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; -- cgit v1.2.3 From a930dc7ee9f834c46474d9f5cf3a80a4e0237ba4 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 13 Jan 2010 01:07:49 -0800 Subject: r300g: Two dirty state optimizations. --- src/gallium/drivers/r300/r300_emit.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index f1acdfc84d..ba04bd07cc 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1003,10 +1003,6 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->context.flush(&r300->context, 0, NULL); } - if (!(r300->dirty_state)) { - return; - } - /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); @@ -1043,10 +1039,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { -- cgit v1.2.3 From aeb34b248903f98559209db4f095d1096ecb580a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 13 Jan 2010 01:41:31 -0800 Subject: r300g: Account for CS space used per atom. Oh yeah, those atoms are startin' to pay off. The main obstacle now for OA playability is the absurdly low default mouse sensitivity, IMO. Not totally smooth yet, but getting there. --- src/gallium/drivers/r300/r300_context.c | 36 +++++++++++++++++++++------------ src/gallium/drivers/r300/r300_context.h | 9 +++++++++ src/gallium/drivers/r300/r300_emit.c | 18 +++++++++++++---- src/gallium/drivers/r300/r300_state.c | 10 ++++++++- 4 files changed, 55 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 98a5bb8e5f..5e4f6552c3 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -110,23 +110,33 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } -#define R300_INIT_ATOM(name) \ - r300->name##_state.state = NULL; \ - r300->name##_state.emit = r300_emit_##name##_state; \ - r300->name##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->name##_state); +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); static void r300_setup_atoms(struct r300_context* r300) { + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(ztop); - R300_INIT_ATOM(blend); - R300_INIT_ATOM(blend_color); - R300_INIT_ATOM(clip); - R300_INIT_ATOM(dsa); - R300_INIT_ATOM(rs); - R300_INIT_ATOM(scissor); - R300_INIT_ATOM(viewport); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); } struct pipe_context* r300_create_context(struct pipe_screen* screen, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7e5de40ded..682b9179c8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -36,10 +36,19 @@ struct r300_fragment_shader; struct r300_vertex_shader; struct r300_atom { + /* List pointers. */ struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ void* state; + /* Emit the state to the context. */ void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ boolean dirty; + /* Another dirty flag that is never automatically cleared. */ boolean always_dirty; }; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index ba04bd07cc..9f93327e59 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -41,6 +41,7 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); @@ -992,14 +993,23 @@ void r300_emit_dirty_state(struct r300_context* r300) struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; struct r300_atom* atom; - int i, dirty_tex = 0; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } + } + + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 00f1b23117..281ff68449 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -340,6 +340,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; @@ -355,6 +356,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -365,11 +367,14 @@ static void r300_set_clip_state(struct pipe_context* pipe, if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.dirty = TRUE; + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -462,8 +467,10 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; r300->dsa_state.dirty = TRUE; } @@ -839,6 +846,7 @@ static void r300_set_scissor_state(struct pipe_context* pipe, memcpy(r300->scissor_state.state, state, sizeof(struct pipe_scissor_state)); + r300->scissor_state.dirty = TRUE; } -- cgit v1.2.3 From ea0cc47a4f769e19ceadb9704669244f5ba53871 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Jan 2010 11:13:00 +0100 Subject: svga: Fix call to translate::run(). --- src/gallium/drivers/svga/svga_state_vs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index c614281858..2313eafc37 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -227,7 +227,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, -- cgit v1.2.3 From 1b333453e4998d5db76952aed6caa34d98dfdc7c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 13 Jan 2010 13:40:09 +0000 Subject: svga: Fix compile_vs error code. It could erroneously return PIPE_OK in some circumstances. Make compile_fs code identical. --- src/gallium/drivers/svga/svga_state_fs.c | 4 +++- src/gallium/drivers/svga/svga_state_vs.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 44bb58c900..4fe9141677 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -76,8 +76,10 @@ static enum pipe_error compile_fs( struct svga_context *svga, } result->id = util_bitmask_add(svga->fs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index db30f2735f..5e33c127d9 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -70,7 +70,7 @@ static enum pipe_error compile_vs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret = PIPE_OK; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_vertex_program( vs, key ); if (result == NULL) { @@ -79,8 +79,10 @@ static enum pipe_error compile_vs( struct svga_context *svga, } result->id = util_bitmask_add(svga->vs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, -- cgit v1.2.3 From 9bb2b901916eded282634dfa52a82982fbeac128 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 13 Jan 2010 18:36:54 -0800 Subject: r300g: Disable immediate mode rendering for now. Will re-enable when the bugs are ironed out. --- src/gallium/drivers/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2edb9c6030..8b07d31000 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -325,7 +325,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); - if (count <= 4 && r300->vertex_buffer_count == 1) { + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { r300_emit_draw_immediate(r300, mode, start, count); } else { r300_emit_aos(r300, start); -- cgit v1.2.3 From 264e548d86ff2e03a097c23e792fe457e77e775c Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 13 Jan 2010 20:44:49 -0800 Subject: r300g: Use indices to translate index buffers of ubytes. progs/redbook/aapoly, among others, works fine now. --- src/gallium/drivers/r300/r300_render.c | 65 ++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 8b07d31000..6645688fc5 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "indices/u_indices.h" + #include "pipe/p_inlines.h" #include "util/u_memory.h" @@ -245,15 +247,55 @@ validate: return TRUE; } +static struct pipe_buffer* r300_translate_elts(struct r300_context* r300, + struct pipe_buffer* elts, + unsigned* size, + unsigned* mode, + unsigned* count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + void *in_map, *out_map; + unsigned out_prim, out_index_size, out_nr; + u_translate_func out_translate; + + (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST, + &out_prim, &out_index_size, &out_nr, &out_translate); + + debug_printf("r300: old mode %d, new mode %d\n", *mode, out_prim); + debug_printf("r300: old count %d, new count %d\n", *count, out_nr); + debug_printf("r300: old size %d, new size %d\n", *size, out_index_size); + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + out_index_size * out_nr); + + in_map = pipe_buffer_map(screen, elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + out_translate(in_map, *count, out_map); + + pipe_buffer_unmap(screen, elts); + pipe_buffer_unmap(screen, new_elts); + + *size = out_index_size; + *mode = out_prim; + *count = out_nr; + + return new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); @@ -274,6 +316,11 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } + if (indexSize == 1) { + indexBuffer = r300_translate_elts(r300, indexBuffer, + &indexSize, &mode, &count); + } + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { return; @@ -289,6 +336,10 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); + + if (indexSize == 1) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ -- cgit v1.2.3 From 60628c65c902f68c600b3d79c06e928aa3286285 Mon Sep 17 00:00:00 2001 From: Łukasz Krotowski Date: Thu, 14 Jan 2010 10:05:02 +0100 Subject: r300g: Really destroy translated buffer at the end. Note that indexSize is changed by r300_translate_elts(). Also make sure it's destroyed on errors. --- src/gallium/drivers/r300/r300_render.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 6645688fc5..528741ab7c 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -316,6 +316,7 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (indexSize == 1) { indexBuffer = r300_translate_elts(r300, indexBuffer, &indexSize, &mode, &count); @@ -323,11 +324,11 @@ void r300_draw_range_elements(struct pipe_context* pipe, if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return; + goto cleanup; } r300_emit_dirty_state(r300); @@ -337,7 +338,8 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - if (indexSize == 1) { +cleanup: + if (indexBuffer != orgIndexBuffer) { pipe->screen->buffer_destroy(indexBuffer); } } -- cgit v1.2.3 From 1bc77e9931a248b74e0ef6b6aa2f4c5b2d1a2ca3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 14 Jan 2010 08:25:46 -0800 Subject: r300g: Cleanup from last few commits. Strangely enough, the code solidified pretty quickly. --- src/gallium/drivers/r300/r300_render.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 528741ab7c..710d850163 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -262,10 +262,6 @@ static struct pipe_buffer* r300_translate_elts(struct r300_context* r300, (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST, &out_prim, &out_index_size, &out_nr, &out_translate); - debug_printf("r300: old mode %d, new mode %d\n", *mode, out_prim); - debug_printf("r300: old count %d, new count %d\n", *count, out_nr); - debug_printf("r300: old size %d, new size %d\n", *size, out_index_size); - new_elts = screen->buffer_create(screen, 32, PIPE_BUFFER_USAGE_INDEX | PIPE_BUFFER_USAGE_CPU_WRITE | @@ -298,6 +294,7 @@ void r300_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { return; @@ -316,7 +313,6 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } - struct pipe_buffer* orgIndexBuffer = indexBuffer; if (indexSize == 1) { indexBuffer = r300_translate_elts(r300, indexBuffer, &indexSize, &mode, &count); -- cgit v1.2.3 From 923aab93d98cd5ec7f19fd9a52490113ef57213b Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Fri, 15 Jan 2010 22:29:13 +0100 Subject: nouveau: nv30: Remove useless variables fs, txp. Also do not change txf, as bit 13 is in the texture format Signed-off-by: Patrice Mandin --- src/gallium/drivers/nv30/nv30_fragtex.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 9893567891..0cc3172dcd 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -43,7 +43,6 @@ static struct nv30_texture_format * nv30_fragtex_format(uint pipe_format) { struct nv30_texture_format *tf = nv30_texture_formats; - char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -65,7 +64,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); struct nv30_texture_format *tf; struct nouveau_stateobj *so; - uint32_t txf, txs , txp; + uint32_t txf, txs; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); @@ -97,13 +96,6 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) return NULL; } - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - txp = 0; - } else { - txp = nv30mt->level[0].pitch; - txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; - } - txs = tf->swizzle; so = so_new(1, 8, 2); -- cgit v1.2.3 From e791e6f27cd8cac7f3a29a981cee1fd2070c94ed Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 16 Jan 2010 16:57:34 +0100 Subject: nv50: get access to primitive input space Vertex data in geometry programs is located in p[] space. The base address in p[] for vertex i is located in vertex attribute space, i.e. a[i << 2]. This means p[] is always accessed with an address register, and I had to to mess with their allocation once again. Also fixes negative offsets e.g. CONST[ADDR[0].x - 3]. --- src/gallium/drivers/nv50/nv50_program.c | 285 ++++++++++++++++++++++---------- 1 file changed, 197 insertions(+), 88 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 069f815938..83cf693fc1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -92,6 +92,9 @@ struct nv50_reg { int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ + + int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */ + int indirect[2]; /* index into pc->addr, or -1 */ }; #define NV50_MOD_NEG 1 @@ -135,7 +138,6 @@ struct nv50_pc { int immd_nr; struct nv50_reg **addr; int addr_nr; - uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; struct nv50_program_exec *temp_temp_exec[16]; @@ -171,6 +173,8 @@ struct nv50_pc { uint8_t edgeflag_out; }; +static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *); + static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) { @@ -179,7 +183,9 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->hw = hw; reg->mod = 0; reg->rhw = -1; + reg->vtx = -1; reg->acc = 0; + reg->indirect[0] = reg->indirect[1] = -1; } static INLINE unsigned @@ -197,7 +203,8 @@ terminate_mbb(struct nv50_pc *pc) /* remove records of temporary address register values */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - pc->r_addr[i].rhw = -1; + if (pc->r_addr[i].index < 0) + pc->r_addr[i].acc = 0; } static void @@ -260,6 +267,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) if (reg) { alloc_reg(pc, reg); *ri = *reg; + reg->indirect[0] = reg->indirect[1] = -1; reg->mod = 0; } return ri; @@ -525,11 +533,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static INLINE void set_addr(struct nv50_program_exec *e, struct nv50_reg *a) { + assert(a->type == P_ADDR); + assert(!(e->inst[0] & 0x0c000000)); assert(!(e->inst[1] & 0x00000004)); e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; + e->inst[1] |= a->hw & 4; +} + +static void +emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t); + +static void +emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int); + +static void +emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[1] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + set_addr(e, src); + + emit(pc, e); } static void @@ -548,72 +578,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } -static struct nv50_reg * -alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) -{ - struct nv50_reg *a_tgsi = NULL, *a = NULL; - int i; - uint8_t avail = ~pc->addr_alloc; - - if (!ref) { - /* allocate for TGSI_FILE_ADDRESS */ - while (avail) { - i = ffs(avail) - 1; - - if (pc->r_addr[i].rhw < 0 || - pc->r_addr[i].acc != pc->insn_cur) { - pc->addr_alloc |= (1 << i); - - pc->r_addr[i].rhw = -1; - pc->r_addr[i].index = i; - return &pc->r_addr[i]; - } - avail &= ~(1 << i); - } - assert(0); - return NULL; - } - - /* Allocate and set an address reg so we can access 'ref'. - * - * If and r_addr->index will be -1 or the hw index the value - * value in rhw is relative to. If rhw < 0, the reg has not - * been initialized or is in use for TGSI_FILE_ADDRESS. - */ - while (avail) { /* only consider regs that are not TGSI */ - i = ffs(avail) - 1; - avail &= ~(1 << i); - - if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) { - /* prefer an usused reg with low hw index */ - a = &pc->r_addr[i]; - continue; - } - if (!a && pc->r_addr[i].acc != pc->insn_cur) - a = &pc->r_addr[i]; - - if (ref->hw - pc->r_addr[i].rhw >= 128) - continue; - - if ((ref->acc >= 0 && pc->r_addr[i].index < 0) || - (ref->acc < 0 && pc->r_addr[i].index == ref->index)) { - pc->r_addr[i].acc = pc->insn_cur; - return &pc->r_addr[i]; - } - } - assert(a); - - if (ref->acc < 0) - a_tgsi = pc->addr[ref->index]; - - emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); - - a->rhw = ref->hw & ~0x7f; - a->acc = pc->insn_cur; - a->index = a_tgsi ? ref->index : -1; - return a; -} - #define INTERP_LINEAR 0 #define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 @@ -657,12 +621,12 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->param.shift = s; e->param.mask = m << (s % 32); - if (src->hw > 127) - set_addr(e, alloc_addr(pc, src)); + if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */ + set_addr(e, get_address_reg(pc, src)); else if (src->acc < 0) { assert(src->type == P_CONST); - set_addr(e, pc->addr[src->index]); + set_addr(e, pc->addr[src->indirect[0]]); } e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); @@ -694,6 +658,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + /* indirect (vertex base + c) load from p[] */ + e->inst[0] |= 0x01800000; + set_addr(e, get_address_reg(pc, src)); + } } alloc_reg(pc, src); @@ -808,6 +778,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + e->inst[0] |= 0x01800000; /* src from p[] */ + set_addr(e, get_address_reg(pc, src)); + } } else if (src->type == P_CONST || src->type == P_IMMD) { struct nv50_reg *temp = temp_temp(pc, e); @@ -832,13 +807,13 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x00800000)); - if (e->inst[0] & 0x01000000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x00800000)); set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } @@ -862,13 +837,13 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x01000000)); - if (e->inst[0] & 0x00800000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x01000000)); set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } @@ -997,11 +972,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= dst->hw << 2; e->inst[0] |= s << 16; /* shift left */ - set_src_0_restricted(pc, src, e); + set_src_0(pc, src, e); emit(pc, e); } +static boolean +address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r) +{ + if (!r) + return FALSE; + + if (r->vtx != a->vtx) + return FALSE; + if (r->vtx >= 0) + return (r->indirect[1] == a->indirect[1]); + + if (r->hw < a->rhw || (r->hw - a->rhw) >= 128) + return FALSE; + + if (a->index >= 0) + return (a->index == r->indirect[0]); + return (a->indirect[0] == r->indirect[0]); +} + +static void +load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *a, int shift) +{ + struct nv50_reg mem, *temp; + + ctor_reg(&mem, P_ATTR, -1, dst->vtx); + + assert(dst->type == P_ADDR); + if (!a) { + emit_arl(pc, dst, &mem, 0); + return; + } + temp = alloc_temp(pc, NULL); + + if (shift) { + emit_mov_from_addr(pc, temp, a); + if (shift < 0) + emit_shl_imm(pc, temp, temp, shift); + emit_arl(pc, dst, temp, MAX2(shift, 0)); + } + emit_mov(pc, temp, &mem); + set_addr(pc->p->exec_tail, dst); + + emit_arl(pc, dst, temp, 0); + free_temp(pc, temp); +} + +/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS + * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX + * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX + * case (vtx < 0, acc >= 0): memory address too high to encode + * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS + */ +static struct nv50_reg * +get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a_ref, *a = NULL; + + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].acc == 0) + a = &pc->r_addr[i]; /* an unused address reg */ + else + if (address_reg_suitable(&pc->r_addr[i], ref)) { + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } else + if (!a && pc->r_addr[i].index < 0 && + pc->r_addr[i].acc < pc->insn_cur) + a = &pc->r_addr[i]; + } + if (!a) { + /* We'll be able to spill address regs when this + * mess is replaced with a proper compiler ... + */ + NOUVEAU_ERR("out of address regs\n"); + abort(); + return NULL; + } + + /* initialize and reserve for this TGSI instruction */ + a->rhw = 0; + a->index = a->indirect[0] = a->indirect[1] = -1; + a->acc = pc->insn_cur; + + if (!ref) { + a->vtx = -1; + return a; + } + a->vtx = ref->vtx; + + /* now put in the correct value ... */ + + if (ref->vtx >= 0) { + a->indirect[1] = ref->indirect[1]; + + /* For an indirect vertex index, we need to shift address right + * by 2, the address register will contain vtx * 16, we need to + * load from a[vtx * 4]. + */ + load_vertex_base(pc, a, (ref->acc < 0) ? + pc->addr[ref->indirect[1]] : NULL, -2); + } else { + assert(ref->acc < 0 || ref->indirect[0] < 0); + + a->rhw = ref->hw & ~0x7f; + a->indirect[0] = ref->indirect[0]; + a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL; + + emit_add_addr_imm(pc, a, a_ref, a->rhw * 4); + } + return a; +} + #define NV50_MAX_F32 0x880 #define NV50_MAX_S32 0x08c #define NV50_MAX_U32 0x084 @@ -2171,8 +2260,9 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { - r = alloc_addr(pc, NULL); - pc->addr[dst->Register.Index * 4 + c] = r; + r = get_address_reg(pc, NULL); + r->index = dst->Register.Index * 4 + c; + pc->addr[r->index] = r; } assert(r); return r; @@ -2208,6 +2298,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, switch (src->Register.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->Register.Index * 4 + c]; + + if (!src->Dimension.Dimension) + break; + r = reg_instance(pc, r); + r->vtx = src->Dimension.Index; + + if (!src->Dimension.Indirect) + break; + swz = tgsi_util_get_src_register_swizzle( + &src->DimIndirect, 0); + r->acc = -1; + r->indirect[1] = src->DimIndirect.Index * 4 + swz; break; case TGSI_FILE_TEMPORARY: r = &pc->temp[src->Register.Index * 4 + c]; @@ -2221,12 +2323,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, * use the index field to select the address reg. */ r = reg_instance(pc, NULL); + ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c); + swz = tgsi_util_get_src_register_swizzle( - &src->Indirect, 0); - ctor_reg(r, P_CONST, - src->Indirect.Index * 4 + swz, - src->Register.Index * 4 + c); + &src->Indirect, 0); r->acc = -1; + r->indirect[0] = src->Indirect.Index * 4 + swz; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->Register.Index * 4 + c]; @@ -2273,7 +2375,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r->mod |= mod & NV50_MOD_I32; assert(r); - if (r->acc >= 0 && r != temp) + if (r->acc >= 0 && r->vtx < 0 && r != temp) return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2495,10 +2597,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_ARL: - assert(src[0][0]); temp = temp_temp(pc, NULL); - emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); - emit_arl(pc, dst[0], temp, 4); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, temp, src[0][c], -1, + CVT_FLOOR | CVT_S32_F32); + emit_arl(pc, dst[c], temp, 4); + } break; case TGSI_OPCODE_BGNLOOP: pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc); @@ -2630,6 +2736,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_ENDSUB: assert(pc->in_subroutine); + terminate_mbb(pc); pc->in_subroutine = FALSE; break; case TGSI_OPCODE_EX2: @@ -3032,6 +3139,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_nop(pc); pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ + + terminate_mbb(pc); break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -3717,7 +3826,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); return TRUE; } -- cgit v1.2.3 From bbb615382d60aa927913b5d1e834727854419438 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 16 Jan 2010 11:36:03 +0100 Subject: nv50: support for geometry programs in nv50_program --- src/gallium/drivers/nv50/nv50_context.c | 8 +- src/gallium/drivers/nv50/nv50_context.h | 9 +- src/gallium/drivers/nv50/nv50_program.c | 422 ++++++++++++++++++------- src/gallium/drivers/nv50/nv50_program.h | 17 +- src/gallium/drivers/nv50/nv50_state_validate.c | 4 +- 5 files changed, 341 insertions(+), 119 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 5997456e4c..22a8498c69 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -67,8 +67,12 @@ nv50_destroy(struct pipe_context *pipe) so_ref(NULL, &nv50->state.vertprog); if (nv50->state.fragprog) so_ref(NULL, &nv50->state.fragprog); - if (nv50->state.programs) - so_ref(NULL, &nv50->state.programs); + if (nv50->state.geomprog) + so_ref(NULL, &nv50->state.geomprog); + if (nv50->state.fp_linkage) + so_ref(NULL, &nv50->state.fp_linkage); + if (nv50->state.gp_linkage) + so_ref(NULL, &nv50->state.gp_linkage); if (nv50->state.vtxfmt) so_ref(NULL, &nv50->state.vtxfmt); if (nv50->state.vtxbuf) diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbd4c3ff86..34cdac5a82 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -129,7 +129,9 @@ struct nv50_state { unsigned miptree_nr[PIPE_SHADER_TYPES]; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *programs; + struct nouveau_stateobj *geomprog; + struct nouveau_stateobj *fp_linkage; + struct nouveau_stateobj *gp_linkage; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; @@ -157,6 +159,7 @@ struct nv50_context { struct pipe_framebuffer_state framebuffer; struct nv50_program *vertprog; struct nv50_program *fragprog; + struct nv50_program *geomprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; @@ -207,7 +210,9 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); -extern void nv50_linkage_validate(struct nv50_context *nv50); +extern void nv50_geomprog_validate(struct nv50_context *nv50); +extern void nv50_fp_linkage_validate(struct nv50_context *nv50); +extern void nv50_gp_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 83cf693fc1..6647f759b5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1718,6 +1718,18 @@ emit_ret(struct nv50_pc *pc, int pred, unsigned cc) emit_control_flow(pc, 0x3, pred, cc); } +static void +emit_prim_cmd(struct nv50_pc *pc, unsigned cmd) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000 | (cmd << 9); + e->inst[1] = 0xc0000000; + set_long(pc, e); + + emit(pc, e); +} + #define QOP_ADD 0 #define QOP_SUBR 1 #define QOP_SUB 2 @@ -2711,6 +2723,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); break; + case TGSI_OPCODE_EMIT: + emit_prim_cmd(pc, 1); + break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -2734,6 +2749,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDPRIM: + emit_prim_cmd(pc, 2); + break; case TGSI_OPCODE_ENDSUB: assert(pc->in_subroutine); terminate_mbb(pc); @@ -3449,6 +3467,24 @@ load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) FREE(one); } +static void +copy_semantic_info(struct nv50_program *p) +{ + unsigned i, id; + + for (i = 0; i < p->cfg.in_nr; ++i) { + id = p->cfg.in[i].id; + p->cfg.in[i].sn = p->info.input_semantic_name[id]; + p->cfg.in[i].si = p->info.input_semantic_index[id]; + } + + for (i = 0; i < p->cfg.out_nr; ++i) { + id = p->cfg.out[i].id; + p->cfg.out[i].sn = p->info.output_semantic_name[id]; + p->cfg.out[i].si = p->info.output_semantic_index[id]; + } +} + static boolean nv50_program_tx_prep(struct nv50_pc *pc) { @@ -3495,13 +3531,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_PSIZE: p->cfg.psiz = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_EDGEFLAG: pc->edgeflag_out = first; @@ -3561,68 +3597,86 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (p->type == PIPE_SHADER_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) { int rid = 0; - for (i = 0; i < pc->attr_nr * 4; ++i) { - if (pc->attr[i].acc) { - pc->attr[i].hw = rid++; - p->cfg.attr[i / 32] |= 1 << (i % 32); + if (p->type == PIPE_SHADER_GEOMETRY) { + for (i = 0; i < pc->attr_nr; ++i) { + p->cfg.in[i].hw = rid; + p->cfg.in[i].id = i; + + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->attr[n].acc) + continue; + pc->attr[n].hw = rid++; + p->cfg.in[i].mask |= 1 << c; + } + } + } else { + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); + } } } for (i = 0, rid = 0; i < pc->result_nr; ++i) { - p->cfg.io[i].hw = rid; - p->cfg.io[i].id = i; + p->cfg.out[i].hw = rid; + p->cfg.out[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; if (!pc->result[n].acc) continue; pc->result[n].hw = rid++; - p->cfg.io[i].mask |= 1 << c; + p->cfg.out[i].mask |= 1 << c; } } for (c = 0; c < 2; ++c) if (p->cfg.two_side[c].hw < 0x40) - p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c] = p->cfg.out[ p->cfg.two_side[c].hw]; if (p->cfg.psiz < 0x40) - p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw; + + copy_semantic_info(p); } else if (p->type == PIPE_SHADER_FRAGMENT) { - int rid, aid; + int rid, aid, base; unsigned n = 0, m = pc->attr_nr - flat_nr; pc->allow32 = TRUE; - int base = (TGSI_SEMANTIC_POSITION == - p->info.input_semantic_name[0]) ? 0 : 1; + base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; /* non-flat interpolants have to be mapped to * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { if (pc->interp_mode[i] == INTERP_FLAT) - p->cfg.io[m++].id = i; + p->cfg.in[m++].id = i; else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) - p->cfg.io[n].linear = TRUE; - p->cfg.io[n++].id = i; + p->cfg.in[n].linear = TRUE; + p->cfg.in[n++].id = i; } } + copy_semantic_info(p); if (!base) /* set w-coordinate mask from perspective interp */ - p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; + p->cfg.in[0].mask |= p->cfg.regs[1] >> 24; aid = popcnt4( /* if fcrd isn't contained in cfg.io */ - base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); + base ? (p->cfg.regs[1] >> 24) : p->cfg.in[0].mask); for (n = 0; n < pc->attr_nr; ++n) { - p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id; + p->cfg.in[n].hw = rid = aid; + i = p->cfg.in[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -3634,15 +3688,15 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr[i * 4 + c].acc) continue; pc->attr[i * 4 + c].rhw = rid++; - p->cfg.io[n].mask |= 1 << c; + p->cfg.in[n].mask |= 1 << c; load_interpolant(pc, &pc->attr[i * 4 + c]); } - aid += popcnt4(p->cfg.io[n].mask); + aid += popcnt4(p->cfg.in[n].mask); } if (!base) - p->cfg.regs[1] |= p->cfg.io[0].mask << 24; + p->cfg.regs[1] |= p->cfg.in[0].mask << 24; m = popcnt4(p->cfg.regs[1] >> 24); @@ -3652,30 +3706,23 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.regs[1] |= aid - m; if (flat_nr) { - i = p->cfg.io[pc->attr_nr - flat_nr].hw; + i = p->cfg.in[pc->attr_nr - flat_nr].hw; p->cfg.regs[1] |= (i - m) << 16; } else p->cfg.regs[1] |= p->cfg.regs[1] << 16; /* mark color semantic for light-twoside */ - n = 0x40; - for (i = 0; i < pc->attr_nr; i++) { - ubyte si, sn; - - sn = p->info.input_semantic_name[p->cfg.io[i].id]; - si = p->info.input_semantic_index[p->cfg.io[i].id]; - - if (sn == TGSI_SEMANTIC_COLOR) { - p->cfg.two_side[si] = p->cfg.io[i]; - - /* increase colour count */ - p->cfg.regs[0] += popcnt4( - p->cfg.two_side[si].mask) << 16; - - n = MIN2(n, p->cfg.io[i].hw - m); + n = 0x80; + for (i = 0; i < p->cfg.in_nr; i++) { + if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) { + n = MIN2(n, p->cfg.in[i].hw - m); + p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i]; + + p->cfg.regs[0] += /* increase colour count */ + popcnt4(p->cfg.in[i].mask) << 16; } } - if (n < 0x40) + if (n < 0x80) p->cfg.regs[0] += n; /* Initialize FP results: @@ -3737,6 +3784,23 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc); } +static INLINE uint32_t +nv50_map_gs_output_prim(unsigned pprim) +{ + switch (pprim) { + case PIPE_PRIM_POINTS: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; + case PIPE_PRIM_LINE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; + default: + NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim); + abort(); + return 0; + } +} + static boolean ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) { @@ -3758,17 +3822,44 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + for (i = 0; i < p->info.num_properties; ++i) { + unsigned *data = &p->info.properties[i].data[0]; + + switch (p->info.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + p->cfg.prim_type = nv50_map_gs_output_prim(data[0]); + break; + case TGSI_PROPERTY_GS_MAX_VERTICES: + p->cfg.vert_count = data[0]; + break; + default: + break; + } + } + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; p->cfg.clpd = 0x40; - p->cfg.io_nr = pc->result_nr; + p->cfg.out_nr = pc->result_nr; + break; + case PIPE_SHADER_GEOMETRY: + assert(p->cfg.prim_type); + assert(p->cfg.vert_count); + + p->cfg.psiz = 0x80; + p->cfg.clpd = 0x80; + p->cfg.out_nr = pc->result_nr; + p->cfg.in_nr = pc->attr_nr; + + p->cfg.two_side[0].hw = 0x80; + p->cfg.two_side[1].hw = 0x80; break; case PIPE_SHADER_FRAGMENT: rtype[0] = rtype[1] = P_TEMP; p->cfg.regs[0] = 0x01000004; - p->cfg.io_nr = pc->attr_nr; + p->cfg.in_nr = pc->attr_nr; if (p->info.writes_z) { p->cfg.regs[2] |= 0x00000100; @@ -3988,13 +4079,17 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { unsigned cb; - uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], + uint32_t *map = pipe_buffer_map(pscreen, + nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - - if (p->type == PIPE_SHADER_VERTEX) + switch (p->type) { + case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break; + case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break; + default: cb = NV50_CB_PVP; - else - cb = NV50_CB_PFP; + assert(p->type == PIPE_SHADER_VERTEX); + break; + } nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); @@ -4088,19 +4183,18 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(5, 8, 2); + so = so_new(5, 7, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); + NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); so_data (so, p->cfg.attr[0]); so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); - so_data (so, p->cfg.high_result); //8); + so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ @@ -4144,42 +4238,74 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } +void +nv50_geomprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->geomprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(6, 7, 2); + so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1); + so_data (so, p->cfg.prim_type); + so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1); + so_data (so, p->cfg.vert_count); + so_method(so, tesla, NV50TCL_GP_START_ID, 1); + so_data (so, 0); + so_ref(so, &nv50->state.geomprog); + so_ref(NULL, &so); +} + static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { + struct nv50_program *vp; struct nv50_program *fp = nv50->fragprog; - struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; uint32_t origin = 0x00000010; + vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog; + /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); - for (i = 0; i < fp->cfg.io_nr; i++) { - uint8_t sn, si; - uint8_t j, k = fp->cfg.io[i].id; - unsigned n = popcnt4(fp->cfg.io[i].mask); + for (i = 0; i < fp->cfg.in_nr; i++) { + unsigned j, n = popcnt4(fp->cfg.in[i].mask); - if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } - for (j = 0; j < vp->info.num_outputs; ++j) { - sn = vp->info.output_semantic_name[j]; - si = vp->info.output_semantic_index[j]; - - if (sn == fp->info.input_semantic_name[k] && - si == fp->info.input_semantic_index[k]) + for (j = 0; j < vp->cfg.out_nr; ++j) + if (vp->cfg.out[j].sn == fp->cfg.in[i].sn && + vp->cfg.out[j].si == fp->cfg.in[i].si) break; - } - if (j < vp->info.num_outputs) { - ubyte mode = - nv50->rasterizer->pipe.sprite_coord_mode[si]; + if (j < vp->cfg.out_nr) { + ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[ + vp->cfg.out[j].si]; if (mode == PIPE_SPRITE_COORD_NONE) { m += n; @@ -4191,7 +4317,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) /* this is either PointCoord or replaced by sprite coords */ for (c = 0; c < 4; c++) { - if (!(fp->cfg.io[i].mask & (1 << c))) + if (!(fp->cfg.in[i].mask & (1 << c))) continue; pntc[m / 8] |= (c + 1) << ((m % 8) * 4); ++m; @@ -4201,18 +4327,22 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) } static int -nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], - struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) { int c; uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; - uint8_t *map = (uint8_t *)p_map; + uint8_t *map = (uint8_t *)map32; for (c = 0; c < 4; ++c) { if (mf & 1) { if (fpi->linear == TRUE) lin[mid / 32] |= 1 << (mid % 32); - map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + if (mv & 1) + map[mid] = oid; + else + map[mid] = (c == 3) ? (zval + 1) : zval; + ++mid; } oid += mv & 1; @@ -4224,16 +4354,21 @@ nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], } void -nv50_linkage_validate(struct nv50_context *nv50) +nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *vp = nv50->vertprog; struct nv50_program *fp = nv50->fragprog; struct nouveau_stateobj *so; - struct nv50_sreg4 dummy, *vpo; + struct nv50_sreg4 dummy; int i, n, c, m = 0; uint32_t map[16], lin[4], reg[5], pcrd[8]; + uint8_t zval = 0x40; + if (nv50->geomprog) { + vp = nv50->geomprog; + zval = 0x80; + } memset(map, 0, sizeof(map)); memset(lin, 0, sizeof(lin)); @@ -4245,13 +4380,15 @@ nv50_linkage_validate(struct nv50_context *nv50) dummy.linear = FALSE; dummy.mask = 0xf; /* map all components of HPOS */ - m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]); dummy.mask = 0x0; if (vp->cfg.clpd < 0x40) { - for (c = 0; c < vp->cfg.clpd_nr; ++c) - map[m++] = vp->cfg.clpd + c; + for (c = 0; c < vp->cfg.clpd_nr; ++c) { + map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8); + ++m; + } reg[1] = (m << 8); } @@ -4259,35 +4396,30 @@ nv50_linkage_validate(struct nv50_context *nv50) /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ if (nv50->rasterizer->pipe.light_twoside) { - vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *fpi = &fp->cfg.two_side[0]; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]); } reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - for (i = 0; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; - - /* position must be mapped first */ - assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); - + for (i = 0; i < fp->cfg.in_nr; i++) { /* maybe even remove these from cfg.io */ - if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION || + fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE) continue; - /* VP outputs and vp->cfg.io are in the same order */ - for (n = 0; n < vp->info.num_outputs; ++n) { - if (vp->info.output_semantic_name[n] == sn && - vp->info.output_semantic_index[n] == si) + for (n = 0; n < vp->cfg.out_nr; ++n) + if (vp->cfg.out[n].sn == fp->cfg.in[i].sn && + vp->cfg.out[n].si == fp->cfg.in[i].si) break; - } - vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i], + (n < vp->cfg.out_nr) ? + &vp->cfg.out[n] : &dummy); } if (nv50->rasterizer->pipe.point_size_per_vertex) { @@ -4295,14 +4427,22 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[3] = (m++ << 4) | 1; } - /* now fill the stateobj */ - so = so_new(7, 57, 0); + /* now fill the stateobj (at most 28 so_data) */ + so = so_new(8, 56, 0); n = (m + 3) / 4; - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); - so_datap (so, map, n); + assert(m <= 32); + if (vp->type == PIPE_SHADER_GEOMETRY) { + so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); + so_datap (so, map, n); + } else { + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + } so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); so_datap (so, reg, 4); @@ -4322,8 +4462,74 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, pcrd, 8); } - so_ref(so, &nv50->state.programs); - so_ref(NULL, &so); + so_method(so, tesla, NV50TCL_GP_ENABLE, 1); + so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); + + so_ref(so, &nv50->state.fp_linkage); + so_ref(NULL, &so); +} + +static int +construct_vp_gp_mapping(uint32_t *map32, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + uint8_t *map = (uint8_t *)map32; + int i, j, c; + + for (i = 0; i < gp->cfg.in_nr; ++i) { + uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask; + + for (j = 0; j < vp->cfg.out_nr; ++j) { + if (vp->cfg.out[j].sn == gp->cfg.in[i].sn && + vp->cfg.out[j].si == gp->cfg.in[i].si) { + mv = vp->cfg.out[j].mask; + oid = vp->cfg.out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->geomprog; + uint32_t map[16]; + int m = 0; + + if (!gp) { + so_ref(NULL, &nv50->state.gp_linkage); + return; + } + memset(map, 0, sizeof(map)); + + m = construct_vp_gp_mapping(map, m, vp, gp); + + so = so_new(2, 14, 0); + + assert(m <= 32); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + + m = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); + so_datap (so, map, m); + + so_ref(so, &nv50->state.gp_linkage); + so_ref(NULL, &so); } void diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 461fec1d89..e35900aa8d 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -16,11 +16,13 @@ struct nv50_program_exec { }; struct nv50_sreg4 { - uint8_t hw; - uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + uint8_t id; /* tgsi index */ uint8_t mask; boolean linear; + + ubyte sn, si; /* semantic name & index */ }; struct nv50_program { @@ -49,13 +51,18 @@ struct nv50_program { uint32_t regs[4]; /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ - unsigned io_nr; - struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + unsigned in_nr, out_nr; + struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS]; + struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS]; /* FP colour inputs, VP/GP back colour outputs */ struct nv50_sreg4 two_side[2]; - /* VP only */ + /* GP only */ + unsigned vert_count; + uint8_t prim_type; + + /* VP & GP only */ uint8_t clpd, clpd_nr; uint8_t psiz; uint8_t edgeflag_in; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f83232f43c..7d4e9a9dc8 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -230,7 +230,7 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.fragprog); if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | NV50_NEW_RASTERIZER)) - so_emit(chan, nv50->state.programs); + so_emit(chan, nv50->state.fp_linkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -293,7 +293,7 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | NV50_NEW_RASTERIZER)) - nv50_linkage_validate(nv50); + nv50_fp_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); -- cgit v1.2.3 From 4c223aaa3b1209006454d600cabc7547fa259a13 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 16 Jan 2010 13:25:55 +0100 Subject: nv50: hook up geometry programs --- src/gallium/drivers/nv50/nv50_context.h | 8 +++-- src/gallium/drivers/nv50/nv50_screen.c | 42 +++++++++++++++--------- src/gallium/drivers/nv50/nv50_screen.h | 4 +-- src/gallium/drivers/nv50/nv50_state.c | 44 ++++++++++++++++++++++++-- src/gallium/drivers/nv50/nv50_state_validate.c | 22 ++++++++++--- src/gallium/drivers/nv50/nv50_tex.c | 2 +- src/gallium/drivers/nv50/nv50_vbo.c | 8 +++++ 7 files changed, 103 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 34cdac5a82..d024be6ea9 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -45,9 +45,11 @@ #define NV50_NEW_VERTPROG_CB (1 << 9) #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) -#define NV50_NEW_ARRAYS (1 << 12) -#define NV50_NEW_SAMPLER (1 << 13) -#define NV50_NEW_TEXTURE (1 << 14) +#define NV50_NEW_GEOMPROG (1 << 12) +#define NV50_NEW_GEOMPROG_CB (1 << 13) +#define NV50_NEW_ARRAYS (1 << 14) +#define NV50_NEW_SAMPLER (1 << 15) +#define NV50_NEW_TEXTURE (1 << 16) struct nv50_blend_stateobj { struct pipe_blend_state pipe; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 28e2b35dea..73205652cc 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -167,7 +167,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) struct nv50_screen *screen = nv50_screen(pscreen); unsigned i; - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { if (screen->constbuf_parm[i]) nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); } @@ -329,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(40, 84, 20); + so = so_new(44, 90, 22); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); @@ -352,10 +352,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); - so_data (so, 0x54); - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); - so_data (so, 0x54); + for (i = 0; i < 3; ++i) { + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); + so_data (so, 0x54); + } + /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); @@ -370,7 +371,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { @@ -406,22 +407,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000021 | (NV50_CB_PMISC << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PGP << 16) | 0x00000800); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000121 | (NV50_CB_PGP << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index a038a4e3c2..7a155ca9c4 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -18,10 +18,10 @@ struct nv50_screen { struct nouveau_notifier *sync; struct nouveau_bo *constbuf_misc[1]; - struct nouveau_bo *constbuf_parm[2]; + struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[2]; + struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; struct nouveau_bo *tic; struct nouveau_bo *tsc; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 1bbbbdd5f0..6ab33be663 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -531,7 +531,7 @@ nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -563,7 +563,39 @@ nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_GEOMETRY; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_GEOMPROG; +} + +static void +nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -596,6 +628,10 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (shader == PIPE_SHADER_FRAGMENT) { nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } else + if (shader == PIPE_SHADER_GEOMETRY) { + nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + nv50->dirty |= NV50_NEW_GEOMPROG_CB; } } @@ -696,6 +732,10 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_fs_state = nv50_fp_state_bind; nv50->pipe.delete_fs_state = nv50_fp_state_delete; + nv50->pipe.create_gs_state = nv50_gp_state_create; + nv50->pipe.bind_gs_state = nv50_gp_state_bind; + nv50->pipe.delete_gs_state = nv50_gp_state_delete; + nv50->pipe.set_blend_color = nv50_set_blend_color; nv50->pipe.set_clip_state = nv50_set_clip_state; nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 7d4e9a9dc8..fcd07b59cd 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -199,6 +199,8 @@ nv50_state_emit(struct nv50_context *nv50) nv50->state.dirty |= NV50_NEW_VERTPROG; if (nv50->state.fragprog) nv50->state.dirty |= NV50_NEW_FRAGPROG; + if (nv50->state.geomprog) + nv50->state.dirty |= NV50_NEW_GEOMPROG; if (nv50->state.rast) nv50->state.dirty |= NV50_NEW_RASTERIZER; if (nv50->state.blend_colour) @@ -228,9 +230,14 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog) + so_emit(chan, nv50->state.geomprog); if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) so_emit(chan, nv50->state.fp_linkage); + if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG)) + && nv50->state.gp_linkage) + so_emit(chan, nv50->state.gp_linkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -291,10 +298,16 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB)) + nv50_geomprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) nv50_fp_linkage_validate(nv50); + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG)) + nv50_gp_linkage_validate(nv50); + if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); @@ -400,8 +413,9 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES - + nr * 8, PIPE_SHADER_TYPES * 2); + so = so_new(1 + 5 * PIPE_SHADER_TYPES, + 1 + 19 * PIPE_SHADER_TYPES + nr * 8, + PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index bef548b728..871536dca9 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -155,7 +155,7 @@ static boolean nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, unsigned p) { - static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 }; + static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 }; struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f2e510fba6..89a94d2fe8 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -55,6 +55,14 @@ nv50_prim(unsigned mode) case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; default: break; } -- cgit v1.2.3 From 0ae3db173455a77355b4e0ee6deeb862803e3d1c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 16 Jan 2010 15:39:32 +0100 Subject: nv50: handle all and more system values --- src/gallium/drivers/nv50/nv50_program.c | 119 ++++++++++++++++++++++++++++---- src/gallium/drivers/nv50/nv50_program.h | 3 + 2 files changed, 110 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6647f759b5..acd695b39b 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -138,6 +138,8 @@ struct nv50_pc { int immd_nr; struct nv50_reg **addr; int addr_nr; + struct nv50_reg *sysval; + int sysval_nr; struct nv50_reg *temp_temp[16]; struct nv50_program_exec *temp_temp_exec[16]; @@ -2281,6 +2283,10 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) } case TGSI_FILE_NULL: return NULL; + case TGSI_FILE_SYSTEM_VALUE: + assert(pc->sysval[dst->Register.Index].type == P_RESULT); + assert(c == 0); + return &pc->sysval[dst->Register.Index]; default: break; } @@ -2351,6 +2357,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; + case TGSI_FILE_SYSTEM_VALUE: + assert(c == 0); + r = &pc->sysval[src->Register.Index]; + break; default: assert(0); break; @@ -3454,17 +3464,22 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) * value of 0 for back-facing, and 0xffffffff for front-facing. */ static void -load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv) { - struct nv50_reg *one = alloc_immd(pc, 1.0f); + struct nv50_reg *temp = alloc_temp(pc, NULL); + int r_pred = 0; - assert(a->rhw == -1); - alloc_reg(pc, a); /* do this before rhw is set */ - a->rhw = 255; - load_interpolant(pc, a); - emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + temp->rhw = 255; + emit_interp(pc, temp, NULL, INTERP_FLAT); - FREE(one); + emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32); + + emit_not(pc, temp, temp); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + emit_cvt(pc, sv, temp, -1, CVT_F32_S32); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + + free_temp(pc, temp); } static void @@ -3491,7 +3506,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct tgsi_parse_context tp; struct nv50_program *p = pc->p; boolean ret = FALSE; - unsigned i, c, flat_nr = 0; + unsigned i, c, instance_id, vertex_id, flat_nr = 0; tgsi_parse_init(&tp, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&tp)) { @@ -3577,6 +3592,37 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(d->Declaration.Semantic); + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_FACE: + assert(p->type == PIPE_SHADER_FRAGMENT); + load_frontfacing(pc, + &pc->sysval[first]); + break; + case TGSI_SEMANTIC_INSTANCEID: + assert(p->type == PIPE_SHADER_VERTEX); + instance_id = first; + p->cfg.regs[0] |= (1 << 4); + break; + case TGSI_SEMANTIC_PRIMID: + assert(p->type != PIPE_SHADER_VERTEX); + p->cfg.prim_id = first; + break; + /* + case TGSI_SEMANTIC_PRIMIDIN: + assert(p->type == PIPE_SHADER_GEOMETRY); + pc->sysval[first].hw = 6; + p->cfg.regs[0] |= (1 << 8); + break; + case TGSI_SEMANTIC_VERTEXID: + assert(p->type == PIPE_SHADER_VERTEX); + vertex_id = first; + p->cfg.regs[0] |= (1 << 12) | (1 << 0); + break; + */ + } + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: @@ -3620,6 +3666,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.attr[i / 32] |= 1 << (i % 32); } } + if (p->cfg.regs[0] & (1 << 0)) + pc->sysval[vertex_id].hw = rid++; + if (p->cfg.regs[0] & (1 << 4)) + pc->sysval[instance_id].hw = rid++; } for (i = 0, rid = 0; i < pc->result_nr; ++i) { @@ -3634,6 +3684,12 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.out[i].mask |= 1 << c; } } + if (p->cfg.prim_id < 0x40) { + /* GP has to write to PrimitiveID */ + ctor_reg(&pc->sysval[p->cfg.prim_id], + P_RESULT, p->cfg.prim_id, rid); + p->cfg.prim_id = rid++; + } for (c = 0; c < 2; ++c) if (p->cfg.two_side[c].hw < 0x40) @@ -3725,6 +3781,14 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (n < 0x80) p->cfg.regs[0] += n; + if (p->cfg.prim_id < 0x40) { + pc->sysval[p->cfg.prim_id].rhw = rid++; + emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL, + INTERP_FLAT); + /* increase FP_INTERPOLANT_CTRL_COUNT */ + p->cfg.regs[1] += 1; + } + /* Initialize FP results: * FragDepth is always first TGSI and last hw output */ @@ -3778,6 +3842,8 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->sysval) + FREE(pc->sysval); if (pc->insn_pos) FREE(pc->insn_pos); @@ -3814,11 +3880,13 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; assert(pc->addr_nr <= 2); + pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; p->cfg.high_temp = 4; p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.prim_id = 0x40; p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; @@ -3849,6 +3917,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.psiz = 0x80; p->cfg.clpd = 0x80; + p->cfg.prim_id = 0x80; p->cfg.out_nr = pc->result_nr; p->cfg.in_nr = pc->attr_nr; @@ -3919,6 +3988,15 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) for (i = 0; i < NV50_SU_MAX_ADDR; ++i) ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + if (pc->sysval_nr) { + pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *)); + if (!pc->sysval) + return FALSE; + /* will only ever use SYSTEM_VALUE[i].x (hopefully) */ + for (i = 0; i < pc->sysval_nr; ++i) + ctor_reg(&pc->sysval[i], rtype[0], i, -1); + } + return TRUE; } @@ -4362,7 +4440,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) struct nouveau_stateobj *so; struct nv50_sreg4 dummy; int i, n, c, m = 0; - uint32_t map[16], lin[4], reg[5], pcrd[8]; + uint32_t map[16], lin[4], reg[6], pcrd[8]; uint8_t zval = 0x40; if (nv50->geomprog) { @@ -4375,6 +4453,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) reg[1] = 0x00000004; /* low and high clip distance map ids */ reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ reg[3] = 0x00000000; /* point size map id & enable */ + reg[5] = 0x00000000; /* primitive ID map slot */ reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ reg[4] = fp->cfg.regs[1]; /* interpolant info */ @@ -4421,6 +4500,13 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) (n < vp->cfg.out_nr) ? &vp->cfg.out[n] : &dummy); } + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->cfg.prim_id < 0x40) { + map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8); + reg[5] = m++; + } if (nv50->rasterizer->pipe.point_size_per_vertex) { map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8); @@ -4428,7 +4514,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj (at most 28 so_data) */ - so = so_new(8, 56, 0); + so = so_new(10, 54, 0); n = (m + 3) / 4; assert(m <= 32); @@ -4438,6 +4524,12 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); so_datap (so, map, n); } else { + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0]); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1); + so_data (so, reg[5]); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); so_data (so, m); so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); @@ -4518,7 +4610,10 @@ nv50_gp_linkage_validate(struct nv50_context *nv50) m = construct_vp_gp_mapping(map, m, vp, gp); - so = so_new(2, 14, 0); + so = so_new(3, 24 - 3, 0); + + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]); assert(m <= 32); so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index e35900aa8d..1e3ad6bff0 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -66,6 +66,9 @@ struct nv50_program { uint8_t clpd, clpd_nr; uint8_t psiz; uint8_t edgeflag_in; + + /* FP & GP only */ + uint8_t prim_id; } cfg; }; -- cgit v1.2.3 From 83387cb6c42e283440e0d1e592fbe3c93a6622c7 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 16 Jan 2010 17:58:35 +0100 Subject: nv50: fix tile flags for scanout tex usage --- src/gallium/drivers/nv50/nv50_miptree.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index cecb1efc90..dc8364ced7 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -92,12 +92,23 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) case PIPE_FORMAT_Z24S8_UNORM: tile_flags = 0x1800; break; + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00; + break; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x2800; break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + tile_flags = 0x7400; + break; default: - tile_flags = 0x7000; + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) && + util_format_get_blocksizebits(pt->format) == 32) + tile_flags = 0x7a00; + else + tile_flags = 0x7000; break; } -- cgit v1.2.3