From 7ca0ce38340144794267609646048b3820d594ab Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 29 Dec 2009 23:21:01 +0100 Subject: Implement draw_arrays_instanced() in softpipe. Modify the translate module to respect instance divisors and accept instance id as a parameter to calculate input vertex offset. --- src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_pt_emit.c') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295..d0abeb9336 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -125,6 +125,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +205,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +265,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { -- cgit v1.2.3 From 543b9566bdaa48fea2df1866fa1310c1cdbcde27 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 30 Dec 2009 22:18:53 +0100 Subject: Add lame support for instanceID to draw module. It's all screaming for integer support -- fake it with float for now. --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_pt.h | 3 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 32 +++++++--- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 1 + .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 14 ++++- src/gallium/auxiliary/draw/draw_vs_varient.c | 3 + src/gallium/auxiliary/translate/translate.h | 6 ++ .../auxiliary/translate/translate_generic.c | 26 +++++--- src/gallium/auxiliary/translate/translate_sse.c | 70 +++++++++++++++------- src/gallium/drivers/svga/svga_state_vs.c | 1 + 11 files changed, 118 insertions(+), 44 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_pt_emit.c') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index bb8a8ff491..d40c035240 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -271,7 +271,8 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227..d5e0d92a60 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d0abeb9336..4fb53276bb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,7 +121,8 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index f88a839f61..36c27e22ff 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -58,12 +58,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,6 +80,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; @@ -92,16 +95,27 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; /* XXX: Make it UINT. */ + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + ei++; + } key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[nr].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 771d94b973..2a604470e9 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,6 +166,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 932113783d..0238f2e234 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -58,6 +58,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + boolean instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -65,6 +67,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -78,7 +89,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 8e14bdd6bd..60b7a3ea36 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -284,6 +284,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; @@ -299,6 +300,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); @@ -308,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 9ae7a482a0..54ed2c1a4b 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,8 +44,14 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 742f03b503..24727d4988 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,6 +46,8 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; @@ -632,22 +634,27 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src; char *dst = (vert + tg->attrib[attr].output_offset); - if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + data[0] = (float)instance_id; } - tg->attrib[attr].fetch( src, data ); - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -700,6 +707,7 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index ba4a246fdb..8e152a002a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -50,13 +50,15 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, unsigned instance_id, - void *output_buffer ); + void *output_buffer, + float instance_id_float ); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, unsigned instance_id, - void *output_buffer ); + void *output_buffer, + float instance_id_float ); struct translate_buffer { const void *base_ptr; @@ -70,6 +72,9 @@ struct translate_buffer_varient { }; +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + + struct translate_sse { struct translate translate; @@ -97,6 +102,7 @@ struct translate_sse { boolean use_instancing; unsigned instance_id; + float instance_id_float; /* XXX: needed while no integer support in TGSI */ run_func gen_run; run_elts_func gen_run_elts; @@ -443,6 +449,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, unsigned var_idx, struct x86_reg elt ) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id_float)); + } if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } @@ -577,6 +587,14 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), p->tmp_EAX); + + /* XXX: temporary */ + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 6)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id_float)), + p->tmp_EAX); } /* Get vertex count, compare to zero @@ -697,7 +715,8 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, elts, count, instance_id, - output_buffer ); + output_buffer, + (float)instance_id ); } static void PIPE_CDECL translate_sse_run( struct translate *translate, @@ -712,7 +731,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, start, count, instance_id, - output_buffer ); + output_buffer, + (float)instance_id); } @@ -735,29 +755,35 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run = translate_sse_run; for (i = 0; i < key->nr_elements; i++) { - unsigned j; + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); - if (key->element[i].instance_divisor) { - p->use_instancing = TRUE; - } + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } - /* - * Map vertex element to vertex buffer varient. - */ - for (j = 0; j < p->nr_buffer_varients; j++) { - if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && - p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { - break; + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; } - if (j == p->nr_buffer_varients) { - p->buffer_varient[j].buffer_index = key->element[i].input_buffer; - p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; - p->nr_buffer_varients++; - } - p->element_to_buffer_varient[i] = j; } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 114de1a49e..82e7874e2a 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -194,6 +194,7 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; -- cgit v1.2.3