From d0b55120beb4848be5e7ef24f0301f8397baa8be Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 23 Dec 2009 19:37:18 +0100 Subject: gallium: Add interfaces needed for instanced drawing. --- src/gallium/include/pipe/p_context.h | 16 ++++++++++++++++ src/gallium/include/pipe/p_shader_tokens.h | 3 ++- src/gallium/include/pipe/p_state.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 11bcdc0a24..d5d1e0e76b 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -69,6 +69,22 @@ struct pipe_context { unsigned indexSize, unsigned mode, unsigned start, unsigned count); + boolean (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + boolean (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 5da85bbbc2..d79124828f 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -130,7 +130,8 @@ struct tgsi_declaration_range #define TGSI_SEMANTIC_NORMAL 6 #define TGSI_SEMANTIC_FACE 7 #define TGSI_SEMANTIC_EDGEFLAG 8 -#define TGSI_SEMANTIC_COUNT 9 /**< number of semantic values */ +#define TGSI_SEMANTIC_INSTANCEID 9 +#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */ struct tgsi_declaration_semantic { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 60e96b98de..4d38bbcc32 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -364,6 +364,7 @@ struct pipe_vertex_buffer unsigned stride; /**< stride to same attrib in next vertex, in bytes */ unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ + unsigned instance_divisor; /**< instance data rate divisor, 0 means per-vertex data */ struct pipe_buffer *buffer; /**< the actual buffer */ }; -- cgit v1.2.3 From f7d1689cfa8e3eb2aea4b86031510323b4d13110 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 29 Dec 2009 19:18:54 +0100 Subject: gallium: Move instance_divisor field from vertex_buffer to vertex_element. --- src/gallium/include/pipe/p_state.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 4d38bbcc32..4d25f601f4 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -364,7 +364,6 @@ struct pipe_vertex_buffer unsigned stride; /**< stride to same attrib in next vertex, in bytes */ unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ - unsigned instance_divisor; /**< instance data rate divisor, 0 means per-vertex data */ struct pipe_buffer *buffer; /**< the actual buffer */ }; @@ -377,6 +376,11 @@ struct pipe_vertex_element /** Offset of this attribute, in bytes, from the start of the vertex */ unsigned src_offset; + /** Instance data rate divisor. 0 means this is per-vertex data, + * n means per-instance data used for n consecutive instances (n > 0). + */ + unsigned instance_divisor; + /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does * this attribute live in? */ -- cgit v1.2.3 From 7124fa16efe0f8ffb402bcd182f276032bed378d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 29 Dec 2009 22:49:31 +0100 Subject: translate: Fix translate_key_compare(). Sizes of translate keys must also match. --- src/gallium/auxiliary/translate/translate.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 34526eb061..1afdf194b3 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -103,8 +103,13 @@ static INLINE int translate_keysize( const struct translate_key *key ) static INLINE int translate_key_compare( const struct translate_key *a, const struct translate_key *b ) { - int keysize = translate_keysize(a); - return memcmp(a, b, keysize); + int keysize_a = translate_keysize(a); + int keysize_b = translate_keysize(b); + + if (keysize_a != keysize_b) { + return keysize_a - keysize_b; + } + return memcmp(a, b, keysize_a); } -- cgit v1.2.3 From 7ca0ce38340144794267609646048b3820d594ab Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 29 Dec 2009 23:21:01 +0100 Subject: Implement draw_arrays_instanced() in softpipe. Modify the translate module to respect instance divisors and accept instance id as a parameter to calculate input vertex offset. --- src/gallium/auxiliary/draw/draw_context.h | 8 ++++ src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_pt.c | 23 ++++++++++ src/gallium/auxiliary/draw/draw_pt_emit.c | 3 ++ src/gallium/auxiliary/draw/draw_pt_fetch.c | 3 ++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++ src/gallium/auxiliary/draw/draw_vs.h | 1 + src/gallium/auxiliary/draw/draw_vs_varient.c | 6 +++ src/gallium/auxiliary/tgsi/tgsi_dump.c | 3 +- src/gallium/auxiliary/translate/translate.h | 2 + .../auxiliary/translate/translate_generic.c | 16 +++++-- src/gallium/auxiliary/translate/translate_sse.c | 1 + src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 51 ++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_state.h | 8 ++++ src/gallium/drivers/svga/svga_state_vs.c | 1 + 17 files changed, 130 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 465b8f10c6..c0f6a61411 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -151,6 +151,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1a5269c0de..bb8a8ff491 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -275,6 +275,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3850cede1e..129d919a84 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -226,6 +226,8 @@ struct draw_context unsigned reduced_prim; + unsigned instance_id; + void *driver_private; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2801dbafe4..1217b9e5d7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -312,5 +312,28 @@ draw_arrays(struct draw_context *draw, unsigned prim, #endif /* drawing done here: */ + draw->instance_id = 0; draw_pt_arrays(draw, prim, start, count); } + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + + if (reduced_prim != draw->reduced_prim) { + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + draw->reduced_prim = reduced_prim; + } + + for (instance = 0; instance < instanceCount; instance++) { + draw->instance_id = instance + startInstance; + draw_pt_arrays(draw, mode, start, count); + } +} diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295..d0abeb9336 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -125,6 +125,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +205,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +265,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 305bfef435..e8174a2971 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -81,6 +81,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; + key.element[nr].instance_divisor = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; dst_offset += 1 * sizeof(float); @@ -100,6 +101,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.element[nr].input_format = draw->pt.vertex_element[i].src_format; key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor; key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[nr].output_offset = dst_offset; @@ -183,6 +185,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, translate->run( translate, start, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e7fe6b3b76..40bfc0fbb2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -169,6 +169,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; + key.element[i].instance_divisor = src->instance_divisor; key.element[i].output_format = output_format; key.element[i].output_offset = dst_offset; @@ -314,6 +315,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); if (0) { @@ -374,6 +376,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e3b807ebd0..00036cfe68 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -43,6 +43,7 @@ struct draw_varient_input enum pipe_format format; unsigned buffer; unsigned offset; + unsigned instance_divisor; }; struct draw_varient_output diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 7ee567d478..4cc080f803 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -180,6 +180,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -202,6 +203,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, @@ -238,6 +240,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -283,6 +286,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].instance_divisor = 0; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; fetch.element[i].output_offset = i * 4 * sizeof(float); assert(fetch.element[i].output_offset < fetch.output_stride); @@ -297,6 +301,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].instance_divisor = 0; emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; assert(emit.element[i].input_offset <= fetch.output_stride); @@ -305,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; + emit.element[i].instance_divisor = 0; emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].output_offset = key->element[i].out.offset; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 5e7e5d2ff9..4391ca75d1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -122,7 +122,8 @@ static const char *semantic_names[] = "GENERIC", "NORMAL", "FACE", - "EDGEFLAG" + "EDGEFLAG", + "INSTANCEID" }; static const char *immediate_type_names[] = diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 1afdf194b3..fb298471b8 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -50,6 +50,7 @@ struct translate_element enum pipe_format output_format; unsigned input_buffer:8; unsigned input_offset:24; + unsigned instance_divisor; unsigned output_offset; }; @@ -79,6 +80,7 @@ struct translate { void (PIPE_CDECL *run)( struct translate *, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer); }; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 266e7ee81e..0fa9927409 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -49,6 +49,7 @@ struct translate_generic { fetch_func fetch; unsigned buffer; unsigned input_offset; + unsigned instance_divisor; emit_func emit; unsigned output_offset; @@ -607,6 +608,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -622,13 +624,20 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", @@ -687,6 +696,7 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index b62db8d8f3..edd0be17f0 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -637,6 +637,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2a33587b5a..406414ae3d 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -238,6 +238,7 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 518ef8806e..6a593fb06a 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -184,3 +184,54 @@ softpipe_draw_elements(struct pipe_context *pipe, 0, 0xffffffff, mode, start, count ); } + +boolean +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + sp->reduced_api_prim = u_reduced_prim(mode); + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + softpipe_map_transfers(sp); + softpipe_map_constant_buffers(sp); + + /* Map vertex buffers */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + + draw_set_mapped_element_buffer_range(draw, 0, start, + start + count - 1, NULL); + + /* draw! */ + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); + + /* unmap vertex/index buffers - will cause draw module to flush */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); + } + + /* Note: leave drawing surfaces mapped */ + softpipe_unmap_constant_buffers(sp); + + sp->dirty_render_cache = TRUE; + + return TRUE; +} diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 26d5c3fbb2..13935fd799 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -189,6 +189,14 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void softpipe_map_transfers(struct softpipe_context *sp); diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 44b7ceb4fa..114de1a49e 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -198,6 +198,7 @@ static int update_zero_stride( struct svga_context *svga, key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); -- cgit v1.2.3 From 0f185cb027b12356895e424506846fa449e7c47f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 14:20:29 +0100 Subject: rtasm: Add x86_div(). --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 7 +++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 8 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 1acf3c373e..f675427d98 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -673,6 +673,13 @@ void x86_and( struct x86_function *p, emit_op_modrm( p, 0x23, 0x21, dst, src ); } +void x86_div( struct x86_function *p, + struct x86_reg src ) +{ + assert(src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); +} + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 731a651796..f7612d416a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x86_div( struct x86_function *p, struct x86_reg src ); void x86_cdecl_caller_push_regs( struct x86_function *p ); -- cgit v1.2.3 From 09c0287b84725098c0b365668231ddf00487c84c Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 14:23:12 +0100 Subject: translate: Implement instancing for linear SSE run. --- src/gallium/auxiliary/translate/translate_sse.c | 154 ++++++++++++++++++------ 1 file changed, 120 insertions(+), 34 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index edd0be17f0..ddfa4c6b4a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -49,6 +49,7 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, @@ -59,7 +60,12 @@ typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, struct translate_buffer { const void *base_ptr; unsigned stride; - void *ptr; /* updated per vertex */ +}; + +struct translate_buffer_varient { + unsigned buffer_index; + unsigned instance_divisor; + void *ptr; /* updated either per vertex or per instance */ }; @@ -81,6 +87,16 @@ struct translate_sse { struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; unsigned nr_buffers; + /* Multiple buffer varients can map to a single buffer. */ + struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_varients; + + /* Multiple elements can map to a single buffer varient. */ + unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; + + boolean use_instancing; + unsigned instance_id; + run_func gen_run; run_elts_func gen_run_elts; @@ -360,31 +376,59 @@ static boolean init_inputs( struct translate_sse *p, { unsigned i; if (linear) { - for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg instance_id = x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; + struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].base_ptr)); + get_offset(p, &buffer->base_ptr)); struct x86_reg elt = p->idx_EBX; - struct x86_reg tmp = p->tmp_EAX; - + struct x86_reg tmp_EAX = p->tmp_EAX; /* Calculate pointer to first attrib: + * base_ptr + stride * index, where index depends on instance divisor */ - x86_mov(p->func, tmp, buf_stride); - x86_imul(p->func, tmp, elt); - x86_add(p->func, tmp, buf_base_ptr); + if (varient->instance_divisor) { + /* Our index is instance ID divided by instance divisor. + */ + x86_mov(p->func, tmp_EAX, instance_id); + + if (varient->instance_divisor != 1) { + struct x86_reg tmp_EDX = p->machine_EDX; + struct x86_reg tmp_ECX = p->outbuf_ECX; + + /* TODO: Add x86_shr() to rtasm and use it whenever + * instance divisor is power of two. + */ + + x86_push(p->func, tmp_EDX); + x86_push(p->func, tmp_ECX); + x86_xor(p->func, tmp_EDX, tmp_EDX); + x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); + x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + x86_pop(p->func, tmp_ECX); + x86_pop(p->func, tmp_EDX); + } + } else { + x86_mov(p->func, tmp_EAX, elt); + } + x86_imul(p->func, tmp_EAX, buf_stride); + x86_add(p->func, tmp_EAX, buf_base_ptr); /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (p->nr_buffers == 1) - x86_mov( p->func, elt, tmp ); + if (p->nr_buffer_varients == 1) + x86_mov(p->func, elt, tmp_EAX); else - x86_mov( p->func, buf_ptr, tmp ); + x86_mov(p->func, buf_ptr, tmp_EAX); } } @@ -394,31 +438,32 @@ static boolean init_inputs( struct translate_sse *p, static struct x86_reg get_buffer_ptr( struct translate_sse *p, boolean linear, - unsigned buf_idx, + unsigned var_idx, struct x86_reg elt ) { - if (linear && p->nr_buffers == 1) { + if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } else if (linear) { struct x86_reg ptr = p->tmp_EAX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].ptr)); + get_offset(p, &p->buffer_varient[var_idx].ptr)); x86_mov(p->func, ptr, buf_ptr); return ptr; } else { struct x86_reg ptr = p->tmp_EAX; + const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].base_ptr)); + get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); @@ -436,28 +481,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, static boolean incr_inputs( struct translate_sse *p, boolean linear ) { - if (linear && p->nr_buffers == 1) { + if (linear && p->nr_buffer_varients == 1) { struct x86_reg stride = x86_make_disp(p->machine_EDX, get_offset(p, &p->buffer[0].stride)); - x86_add(p->func, p->idx_EBX, stride); - sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + if (p->buffer_varient[0].instance_divisor == 0) { + x86_add(p->func, p->idx_EBX, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + } } else if (linear) { unsigned i; /* Is this worthwhile?? */ - for (i = 0; i < p->nr_buffers; i++) { + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); - x86_mov(p->func, p->tmp_EAX, buf_ptr); - x86_add(p->func, p->tmp_EAX, buf_stride); - if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); - x86_mov(p->func, buf_ptr, p->tmp_EAX); + if (varient->instance_divisor == 0) { + x86_mov(p->func, p->tmp_EAX, buf_ptr); + x86_add(p->func, p->tmp_EAX, buf_stride); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } } } else { @@ -514,7 +564,18 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); - x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4)); + x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); + + /* Load instance ID. + */ + if (p->use_instancing) { + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 4)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), + p->tmp_EAX); + } /* Get vertex count, compare to zero */ @@ -531,17 +592,18 @@ static boolean build_vertex_emit( struct translate_sse *p, label = x86_get_label(p->func); { struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); - int last_vb = -1; + int last_varient = -1; struct x86_reg vb; for (j = 0; j < p->translate.key.nr_elements; j++) { const struct translate_element *a = &p->translate.key.element[j]; + unsigned varient = p->element_to_buffer_varient[j]; /* Figure out source pointer address: */ - if (a->input_buffer != last_vb) { - last_vb = a->input_buffer; - vb = get_buffer_ptr(p, linear, a->input_buffer, elt); + if (varient != last_varient) { + last_varient = varient; + vb = get_buffer_ptr(p, linear, varient, elt); } if (!translate_attr( p, a, @@ -645,6 +707,7 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, p->gen_run( translate, start, count, + instance_id, output_buffer ); } @@ -667,9 +730,32 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; - for (i = 0; i < key->nr_elements; i++) + for (i = 0; i < key->nr_elements; i++) { + unsigned j; + p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } + + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } + } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } + if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); if (!build_vertex_emit(p, &p->linear_func, TRUE)) -- cgit v1.2.3 From 76e53923ba79124c6df55bddd7e9a11a7e9104d4 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 16:28:01 +0100 Subject: translate: Fix a call to indexed SSE run. --- src/gallium/auxiliary/translate/translate_sse.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index ddfa4c6b4a..ffc5fe9e81 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -55,6 +55,7 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ); struct translate_buffer { @@ -693,6 +694,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, p->gen_run_elts( translate, elts, count, + 0, output_buffer ); } -- cgit v1.2.3 From 5007e39f76c897b8f3aa4acf6086c8b7ac30bdef Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 18:26:40 +0100 Subject: Implement instanced indexed draw. --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 1 + src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 1 + src/gallium/auxiliary/draw/draw_vs_varient.c | 1 + src/gallium/auxiliary/translate/translate.h | 1 + src/gallium/auxiliary/translate/translate_generic.c | 14 +++++++++++--- src/gallium/auxiliary/translate/translate_sse.c | 20 +++++++++++--------- 6 files changed, 26 insertions(+), 12 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index e8174a2971..f88a839f61 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -160,6 +160,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, translate->run_elts( translate, elts, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 40bfc0fbb2..771d94b973 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -257,6 +257,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, feme->translate->run_elts( feme->translate, fetch_elts, fetch_count, + draw->instance_id, hw_verts ); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 4cc080f803..8e14bdd6bd 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -142,6 +142,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->fetch->run_elts( vsvg->fetch, elts, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index fb298471b8..9ae7a482a0 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -75,6 +75,7 @@ struct translate { void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer); void (PIPE_CDECL *run)( struct translate *, diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 0fa9927409..742f03b503 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -569,6 +569,7 @@ static emit_func get_emit_func( enum pipe_format format ) static void PIPE_CDECL generic_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -584,13 +585,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index ffc5fe9e81..ba4a246fdb 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -376,13 +376,14 @@ static boolean init_inputs( struct translate_sse *p, boolean linear ) { unsigned i; - if (linear) { - struct x86_reg instance_id = x86_make_disp(p->machine_EDX, - get_offset(p, &p->instance_id)); + struct x86_reg instance_id = x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); - for (i = 0; i < p->nr_buffer_varients; i++) { - struct translate_buffer_varient *varient = &p->buffer_varient[i]; - struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; + struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + + if (linear || varient->instance_divisor) { struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, @@ -426,7 +427,7 @@ static boolean init_inputs( struct translate_sse *p, /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (p->nr_buffer_varients == 1) + if (linear && p->nr_buffer_varients == 1) x86_mov(p->func, elt, tmp_EAX); else x86_mov(p->func, buf_ptr, tmp_EAX); @@ -445,7 +446,7 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } - else if (linear) { + else if (linear || p->buffer_varient[var_idx].instance_divisor) { struct x86_reg ptr = p->tmp_EAX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, @@ -687,6 +688,7 @@ static void translate_sse_release( struct translate *translate ) static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -694,7 +696,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, p->gen_run_elts( translate, elts, count, - 0, + instance_id, output_buffer ); } -- cgit v1.2.3 From bccdb239c700e0c7c90d27a281d5246b958581b5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 18:27:58 +0100 Subject: sp: Implement draw_elements_instanced(). --- src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 46 +++++++++++++++++++++++++-- src/gallium/drivers/softpipe/sp_state.h | 10 ++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 406414ae3d..969d69d6b4 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -239,6 +239,7 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6a593fb06a..debf5bfe06 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -192,6 +192,26 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount) +{ + return softpipe_draw_elements_instanced(pipe, + NULL, + 0, + mode, + start, + count, + startInstance, + instanceCount); +} + +boolean +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -216,8 +236,26 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, buf); } - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + 0, + 0xffffffff, + mapped_indexes); + } else { + /* no index/element buffer */ + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); + } /* draw! */ draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); @@ -227,6 +265,10 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe_buffer_unmap(pipe->screen, indexBuffer); + } /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 13935fd799..00da41b985 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -197,6 +197,16 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); +boolean +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void softpipe_map_transfers(struct softpipe_context *sp); -- cgit v1.2.3 From 230355648b647b32161124cad23be553a2c6d196 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 18:41:05 +0100 Subject: sp: Generalise drawing code to remove dupes. Also, avoid nested draw calls from simpler versions. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 165 +++++++++++++------------- 1 file changed, 84 insertions(+), 81 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index debf5bfe06..14cb1322e1 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -88,19 +88,41 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static boolean +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + return softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ boolean softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -109,67 +131,16 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { - struct softpipe_context *sp = softpipe_context(pipe); - struct draw_context *draw = sp->draw; - unsigned i; - - sp->reduced_api_prim = u_reduced_prim(mode); - - if (sp->dirty) - softpipe_update_derived( sp ); - - softpipe_map_transfers(sp); - softpipe_map_constant_buffers(sp); - - /* - * Map vertex buffers - */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } - - /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, - mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); - } - - /* draw! */ - draw_arrays(draw, mode, start, count); - - /* - * unmap vertex/index buffers - will cause draw module to flush - */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); - } - if (indexBuffer) { - draw_set_mapped_element_buffer(draw, 0, NULL); - pipe_buffer_unmap(pipe->screen, indexBuffer); - } - - - /* Note: leave drawing surfaces mapped */ - softpipe_unmap_constant_buffers(sp); - - sp->dirty_render_cache = TRUE; - - return TRUE; + return softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); } @@ -179,10 +150,16 @@ softpipe_draw_elements(struct pipe_context *pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + return softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } boolean @@ -193,14 +170,16 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { - return softpipe_draw_elements_instanced(pipe, - NULL, - 0, - mode, - start, - count, - startInstance, - instanceCount); + return softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); } boolean @@ -212,6 +191,30 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount) +{ + return softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static boolean +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -245,8 +248,8 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(draw, indexSize, - 0, - 0xffffffff, + minIndex, + maxIndex, mapped_indexes); } else { /* no index/element buffer */ -- cgit v1.2.3 From 6481f40eec0a6c5dea4a74a11b83415478f2c814 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 18:54:04 +0100 Subject: draw: Implement draw_arrays() in terms of draw_arrays_instanced(). --- src/gallium/auxiliary/draw/draw_pt.c | 44 +++++++++++++++--------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 1217b9e5d7..a5ddec5286 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -280,20 +280,33 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - unsigned reduced_prim = u_reduced_prim(prim); + draw_arrays_instanced(draw, prim, start, count, 0, 1); +} + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + if (reduced_prim != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, prim, start, MIN2(count, 20)); + draw_print_arrays(draw, mode, start, MIN2(count, 20)); #if 0 { int i; - debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", - prim, start, count); + debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", + mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { @@ -311,27 +324,6 @@ draw_arrays(struct draw_context *draw, unsigned prim, } #endif - /* drawing done here: */ - draw->instance_id = 0; - draw_pt_arrays(draw, prim, start, count); -} - -void -draw_arrays_instanced(struct draw_context *draw, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - unsigned reduced_prim = u_reduced_prim(mode); - unsigned instance; - - if (reduced_prim != draw->reduced_prim) { - draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); - draw->reduced_prim = reduced_prim; - } - for (instance = 0; instance < instanceCount; instance++) { draw->instance_id = instance + startInstance; draw_pt_arrays(draw, mode, start, count); -- cgit v1.2.3 From 5754185d39c147cd81b97fd331ccf78aebfad5e7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 21:52:01 +0100 Subject: tgsi: Support system values in ureg. --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 36 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_ureg.h | 6 ++++++ 2 files changed, 42 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 1e730e5342..2713372b05 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -64,6 +64,7 @@ struct ureg_tokens { }; #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 @@ -94,6 +95,13 @@ struct ureg_program } gs_input[UREG_MAX_INPUT]; unsigned nr_gs_inputs; + struct { + unsigned index; + unsigned semantic_name; + unsigned semantic_index; + } system_value[UREG_MAX_SYSTEM_VALUE]; + unsigned nr_system_values; + struct { unsigned semantic_name; unsigned semantic_index; @@ -299,6 +307,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg, } +struct ureg_src +ureg_DECL_system_value(struct ureg_program *ureg, + unsigned index, + unsigned semantic_name, + unsigned semantic_index) +{ + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { + ureg->system_value[ureg->nr_system_values].index = index; + ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; + ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + ureg->nr_system_values++; + } else { + set_bad(ureg); + } + + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +} + + struct ureg_dst ureg_DECL_output( struct ureg_program *ureg, unsigned name, @@ -1003,6 +1030,15 @@ static void emit_decls( struct ureg_program *ureg ) } } + for (i = 0; i < ureg->nr_system_values; i++) { + emit_decl(ureg, + TGSI_FILE_SYSTEM_VALUE, + ureg->system_value[i].index, + ureg->system_value[i].semantic_name, + ureg->system_value[i].semantic_index, + TGSI_INTERPOLATE_CONSTANT); + } + for (i = 0; i < ureg->nr_outputs; i++) { emit_decl( ureg, TGSI_FILE_OUTPUT, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 7e3e7bcf1d..179862d4e2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -137,6 +137,12 @@ struct ureg_src ureg_DECL_gs_input(struct ureg_program *, unsigned index); +struct ureg_src +ureg_DECL_system_value(struct ureg_program *, + unsigned index, + unsigned semantic_name, + unsigned semantic_index); + struct ureg_dst ureg_DECL_output( struct ureg_program *, unsigned semantic_name, -- cgit v1.2.3 From 543b9566bdaa48fea2df1866fa1310c1cdbcde27 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 22:18:53 +0100 Subject: Add lame support for instanceID to draw module. It's all screaming for integer support -- fake it with float for now. --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_pt.h | 3 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 32 +++++++--- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 1 + .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 14 ++++- src/gallium/auxiliary/draw/draw_vs_varient.c | 3 + src/gallium/auxiliary/translate/translate.h | 6 ++ .../auxiliary/translate/translate_generic.c | 26 +++++--- src/gallium/auxiliary/translate/translate_sse.c | 70 +++++++++++++++------- src/gallium/drivers/svga/svga_state_vs.c | 1 + 11 files changed, 118 insertions(+), 44 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index bb8a8ff491..d40c035240 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -271,7 +271,8 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227..d5e0d92a60 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d0abeb9336..4fb53276bb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,7 +121,8 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index f88a839f61..36c27e22ff 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -58,12 +58,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,6 +80,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; @@ -92,16 +95,27 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; /* XXX: Make it UINT. */ + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + ei++; + } key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[nr].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 771d94b973..2a604470e9 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,6 +166,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 932113783d..0238f2e234 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -58,6 +58,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + boolean instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -65,6 +67,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -78,7 +89,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 8e14bdd6bd..60b7a3ea36 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -284,6 +284,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; @@ -299,6 +300,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); @@ -308,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 9ae7a482a0..54ed2c1a4b 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,8 +44,14 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 742f03b503..24727d4988 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,6 +46,8 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; @@ -632,22 +634,27 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src; char *dst = (vert + tg->attrib[attr].output_offset); - if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + data[0] = (float)instance_id; } - tg->attrib[attr].fetch( src, data ); - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -700,6 +707,7 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index ba4a246fdb..8e152a002a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -50,13 +50,15 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, unsigned instance_id, - void *output_buffer ); + void *output_buffer, + float instance_id_float ); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, unsigned instance_id, - void *output_buffer ); + void *output_buffer, + float instance_id_float ); struct translate_buffer { const void *base_ptr; @@ -70,6 +72,9 @@ struct translate_buffer_varient { }; +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + + struct translate_sse { struct translate translate; @@ -97,6 +102,7 @@ struct translate_sse { boolean use_instancing; unsigned instance_id; + float instance_id_float; /* XXX: needed while no integer support in TGSI */ run_func gen_run; run_elts_func gen_run_elts; @@ -443,6 +449,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, unsigned var_idx, struct x86_reg elt ) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id_float)); + } if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } @@ -577,6 +587,14 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), p->tmp_EAX); + + /* XXX: temporary */ + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 6)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id_float)), + p->tmp_EAX); } /* Get vertex count, compare to zero @@ -697,7 +715,8 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, elts, count, instance_id, - output_buffer ); + output_buffer, + (float)instance_id ); } static void PIPE_CDECL translate_sse_run( struct translate *translate, @@ -712,7 +731,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, start, count, instance_id, - output_buffer ); + output_buffer, + (float)instance_id); } @@ -735,29 +755,35 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run = translate_sse_run; for (i = 0; i < key->nr_elements; i++) { - unsigned j; + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); - if (key->element[i].instance_divisor) { - p->use_instancing = TRUE; - } + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } - /* - * Map vertex element to vertex buffer varient. - */ - for (j = 0; j < p->nr_buffer_varients; j++) { - if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && - p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { - break; + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; } - if (j == p->nr_buffer_varients) { - p->buffer_varient[j].buffer_index = key->element[i].input_buffer; - p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; - p->nr_buffer_varients++; - } - p->element_to_buffer_varient[i] = j; } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 114de1a49e..82e7874e2a 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -194,6 +194,7 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; -- cgit v1.2.3 From 76ff89d2de3807f316954aa97ffdbb51bb3b6966 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 5 Jan 2010 11:48:32 +0100 Subject: gallium: Handle InstanceID as a true 32-bit uint. --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 13 +++++++++---- src/gallium/auxiliary/translate/translate_sse.c | 23 +++++------------------ 2 files changed, 14 insertions(+), 22 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 36c27e22ff..55e7a7b81a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -107,19 +107,24 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, for (i = 0; i < nr_inputs; i++) { if (i == instance_id_index) { key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; - key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; /* XXX: Make it UINT. */ + key.element[nr].input_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_offset = dst_offset; + + dst_offset += sizeof(uint); } else { key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + ei++; + dst_offset += 4 * sizeof(float); } - key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - key.element[nr].output_offset = dst_offset; - dst_offset += 4 * sizeof(float); nr++; } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 8e152a002a..c13e742738 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -50,15 +50,13 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, unsigned instance_id, - void *output_buffer, - float instance_id_float ); + void *output_buffer); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, unsigned instance_id, - void *output_buffer, - float instance_id_float ); + void *output_buffer); struct translate_buffer { const void *base_ptr; @@ -102,7 +100,6 @@ struct translate_sse { boolean use_instancing; unsigned instance_id; - float instance_id_float; /* XXX: needed while no integer support in TGSI */ run_func gen_run; run_elts_func gen_run_elts; @@ -451,7 +448,7 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, { if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { return x86_make_disp(p->machine_EDX, - get_offset(p, &p->instance_id_float)); + get_offset(p, &p->instance_id)); } if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; @@ -587,14 +584,6 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), p->tmp_EAX); - - /* XXX: temporary */ - x86_mov(p->func, - p->tmp_EAX, - x86_fn_arg(p->func, 6)); - x86_mov(p->func, - x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id_float)), - p->tmp_EAX); } /* Get vertex count, compare to zero @@ -715,8 +704,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, elts, count, instance_id, - output_buffer, - (float)instance_id ); + output_buffer); } static void PIPE_CDECL translate_sse_run( struct translate *translate, @@ -731,8 +719,7 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, start, count, instance_id, - output_buffer, - (float)instance_id); + output_buffer); } -- cgit v1.2.3 From 8c53a2576ec998936981c354b02979f803c0e4de Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 12 Jan 2010 18:51:27 +0100 Subject: gallium: draw_arrays/elements_instanced() are of type void. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 44 +++++++++++++-------------- src/gallium/drivers/softpipe/sp_state.h | 4 +-- src/gallium/include/pipe/p_context.h | 30 +++++++++--------- 3 files changed, 39 insertions(+), 39 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 87312ae151..b3ece9d8ed 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -172,7 +172,7 @@ softpipe_draw_elements(struct pipe_context *pipe, 1); } -boolean +void softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, @@ -180,19 +180,19 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { - return softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); } -boolean +void softpipe_draw_elements_instanced(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -202,16 +202,16 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { - return softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); } static boolean diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index f8886565e9..3153d6e6a4 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -199,7 +199,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, @@ -207,7 +207,7 @@ softpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); -boolean +void softpipe_draw_elements_instanced(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 6394e095d3..f2242d2069 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -69,21 +69,21 @@ struct pipe_context { unsigned indexSize, unsigned mode, unsigned start, unsigned count); - boolean (*draw_arrays_instanced)(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - boolean (*draw_elements_instanced)(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); + void (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. -- cgit v1.2.3 From ea0cc47a4f769e19ceadb9704669244f5ba53871 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Jan 2010 11:13:00 +0100 Subject: svga: Fix call to translate::run(). --- src/gallium/drivers/svga/svga_state_vs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index c614281858..2313eafc37 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -227,7 +227,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, -- cgit v1.2.3