summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c29
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c34
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c10
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c7
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c36
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h6
-rw-r--r--src/gallium/auxiliary/translate/translate.h18
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c40
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c199
19 files changed, 357 insertions, 71 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index b716209df2..8a64c06efc 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -164,6 +164,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw,
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
+void
+draw_arrays_instanced(struct draw_context *draw,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
void draw_flush(struct draw_context *draw);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 1a5269c0de..d40c035240 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf,
/* Note: we really do want data[0] here, not data[pos]:
*/
vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
- vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
+ vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);
if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
@@ -271,10 +271,12 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
emit_sz = 0;
break;
}
-
+
+ hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
hw_key.element[i].input_buffer = src_buffer;
hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].instance_divisor = 0;
hw_key.element[i].output_format = output_format;
hw_key.element[i].output_offset = dst_offset;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index e49041556b..2a11b8c545 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -239,6 +239,8 @@ struct draw_context
unsigned reduced_prim;
+ unsigned instance_id;
+
void *driver_private;
};
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 2801dbafe4..a5ddec5286 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -280,20 +280,33 @@ void
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
- unsigned reduced_prim = u_reduced_prim(prim);
+ draw_arrays_instanced(draw, prim, start, count, 0, 1);
+}
+
+void
+draw_arrays_instanced(struct draw_context *draw,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ unsigned reduced_prim = u_reduced_prim(mode);
+ unsigned instance;
+
if (reduced_prim != draw->reduced_prim) {
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
draw->reduced_prim = reduced_prim;
}
if (0)
- draw_print_arrays(draw, prim, start, MIN2(count, 20));
+ draw_print_arrays(draw, mode, start, MIN2(count, 20));
#if 0
{
int i;
- debug_printf("draw_arrays(prim=%u start=%u count=%u):\n",
- prim, start, count);
+ debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
+ mode, start, count);
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
@@ -311,6 +324,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
}
#endif
- /* drawing done here: */
- draw_pt_arrays(draw, prim, start, count);
+ for (instance = 0; instance < instanceCount; instance++) {
+ draw->instance_id = instance + startInstance;
+ draw_pt_arrays(draw, mode, start, count);
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 20edf7a227..d5e0d92a60 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
struct pt_fetch;
void draw_pt_fetch_prepare( struct pt_fetch *fetch,
unsigned vertex_input_count,
- unsigned vertex_size );
+ unsigned vertex_size,
+ unsigned instance_id_index );
void draw_pt_fetch_run( struct pt_fetch *fetch,
const unsigned *elts,
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 064e16c295..4fb53276bb 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -121,10 +121,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
emit_sz = 0;
break;
}
-
+
+ hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
hw_key.element[i].input_buffer = src_buffer;
hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].instance_divisor = 0;
hw_key.element[i].output_format = output_format;
hw_key.element[i].output_offset = dst_offset;
@@ -204,6 +206,7 @@ void draw_pt_emit( struct pt_emit *emit,
translate->run( translate,
0,
vertex_count,
+ draw->instance_id,
hw_verts );
render->unmap_vertices( render,
@@ -263,6 +266,7 @@ void draw_pt_emit_linear(struct pt_emit *emit,
translate->run(translate,
0,
count,
+ draw->instance_id,
hw_verts);
if (0) {
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 305bfef435..36c27e22ff 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -58,12 +58,14 @@ struct pt_fetch {
*/
void draw_pt_fetch_prepare( struct pt_fetch *fetch,
unsigned vs_input_count,
- unsigned vertex_size )
+ unsigned vertex_size,
+ unsigned instance_id_index )
{
struct draw_context *draw = fetch->draw;
unsigned nr_inputs;
- unsigned i, nr = 0;
+ unsigned i, nr = 0, ei = 0;
unsigned dst_offset = 0;
+ unsigned num_extra_inputs = 0;
struct translate_key key;
fetch->vertex_size = vertex_size;
@@ -78,9 +80,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
{
/* Need to set header->vertex_id = 0xffff somehow.
*/
+ key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
key.element[nr].input_offset = 0;
+ key.element[nr].instance_divisor = 0;
key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].output_offset = dst_offset;
dst_offset += 1 * sizeof(float);
@@ -91,15 +95,27 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
*/
dst_offset += 4 * sizeof(float);
}
-
- assert( draw->pt.nr_vertex_elements >= vs_input_count );
- nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements );
+ if (instance_id_index != ~0) {
+ num_extra_inputs++;
+ }
+
+ assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count);
+
+ nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs);
for (i = 0; i < nr_inputs; i++) {
- key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
- key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
- key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
+ if (i == instance_id_index) {
+ key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID;
+ key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; /* XXX: Make it UINT. */
+ } else {
+ key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
+ key.element[nr].input_format = draw->pt.vertex_element[ei].src_format;
+ key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index;
+ key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset;
+ key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor;
+ ei++;
+ }
key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
key.element[nr].output_offset = dst_offset;
@@ -158,6 +174,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
translate->run_elts( translate,
elts,
count,
+ draw->instance_id,
verts );
}
@@ -183,6 +200,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
translate->run( translate,
start,
count,
+ draw->instance_id,
verts );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index e7fe6b3b76..2a604470e9 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -166,9 +166,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
continue;
}
+ key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
key.element[i].input_format = input_format;
key.element[i].input_buffer = input_buffer;
key.element[i].input_offset = input_offset;
+ key.element[i].instance_divisor = src->instance_divisor;
key.element[i].output_format = output_format;
key.element[i].output_offset = dst_offset;
@@ -256,6 +258,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
feme->translate->run_elts( feme->translate,
fetch_elts,
fetch_count,
+ draw->instance_id,
hw_verts );
if (0) {
@@ -314,6 +317,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
feme->translate->run( feme->translate,
start,
count,
+ draw->instance_id,
hw_verts );
if (0) {
@@ -374,6 +378,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
feme->translate->run( feme->translate,
start,
count,
+ draw->instance_id,
hw_verts );
draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 1a9df4cac5..279f4eec63 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -59,6 +59,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ unsigned i;
+ boolean instance_id_index = ~0;
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
@@ -66,6 +68,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned nr = MAX2( vs->info.num_inputs,
vs->info.num_outputs + 1 );
+ /* Scan for instanceID system value.
+ */
+ for (i = 0; i < vs->info.num_inputs; i++) {
+ if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ instance_id_index = i;
+ break;
+ }
+ }
+
fpme->prim = prim;
fpme->opt = opt;
@@ -79,7 +90,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
draw_pt_fetch_prepare( fpme->fetch,
vs->info.num_inputs,
- fpme->vertex_size );
+ fpme->vertex_size,
+ instance_id_index );
/* XXX: it's not really gl rasterization rules we care about here,
* but gl vs dx9 clip spaces.
*/
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index e3b807ebd0..00036cfe68 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -43,6 +43,7 @@ struct draw_varient_input
enum pipe_format format;
unsigned buffer;
unsigned offset;
+ unsigned instance_divisor;
};
struct draw_varient_output
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index d16692584e..9f40030f39 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -142,6 +142,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->fetch->run_elts( vsvg->fetch,
elts,
count,
+ vsvg->draw->instance_id,
temp_buffer );
vsvg->base.vs->run_linear( vsvg->base.vs,
@@ -181,6 +182,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->emit->run( vsvg->emit,
0, count,
+ vsvg->draw->instance_id,
output_buffer );
FREE(temp_buffer);
@@ -203,6 +205,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->fetch->run( vsvg->fetch,
start,
count,
+ vsvg->draw->instance_id,
temp_buffer );
vsvg->base.vs->run_linear( vsvg->base.vs,
@@ -239,6 +242,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->emit->run( vsvg->emit,
0, count,
+ vsvg->draw->instance_id,
output_buffer );
FREE(temp_buffer);
@@ -281,9 +285,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
fetch.nr_elements = key->nr_inputs;
fetch.output_stride = vsvg->temp_vertex_stride;
for (i = 0; i < key->nr_inputs; i++) {
+ fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
fetch.element[i].input_format = key->element[i].in.format;
fetch.element[i].input_buffer = key->element[i].in.buffer;
fetch.element[i].input_offset = key->element[i].in.offset;
+ fetch.element[i].instance_divisor = 0;
fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
fetch.element[i].output_offset = i * 4 * sizeof(float);
assert(fetch.element[i].output_offset < fetch.output_stride);
@@ -295,17 +301,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
for (i = 0; i < key->nr_outputs; i++) {
if (key->element[i].out.format != EMIT_1F_PSIZE)
{
+ emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit.element[i].input_buffer = 0;
emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
+ emit.element[i].instance_divisor = 0;
emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
emit.element[i].output_offset = key->element[i].out.offset;
assert(emit.element[i].input_offset <= fetch.output_stride);
}
else {
+ emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].input_buffer = 1;
emit.element[i].input_offset = 0;
+ emit.element[i].instance_divisor = 0;
emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].output_offset = key->element[i].out.offset;
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 1acf3c373e..f675427d98 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -673,6 +673,13 @@ void x86_and( struct x86_function *p,
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
+void x86_div( struct x86_function *p,
+ struct x86_reg src )
+{
+ assert(src.file == file_REG32 && src.mod == mod_REG);
+ emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
+}
+
/***********************************************************************
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 731a651796..f7612d416a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
+void x86_div( struct x86_function *p, struct x86_reg src );
void x86_cdecl_caller_push_regs( struct x86_function *p );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e2e5394f86..d7ff262f30 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -123,7 +123,8 @@ static const char *semantic_names[] =
"NORMAL",
"FACE",
"EDGEFLAG",
- "PRIM_ID"
+ "PRIM_ID",
+ "INSTANCEID"
};
static const char *immediate_type_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index e64e2b731d..2b51672b8e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -64,6 +64,7 @@ struct ureg_tokens {
};
#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
#define UREG_MAX_CONSTANT_RANGE 32
#define UREG_MAX_IMMEDIATE 32
@@ -95,6 +96,13 @@ struct ureg_program
unsigned nr_gs_inputs;
struct {
+ unsigned index;
+ unsigned semantic_name;
+ unsigned semantic_index;
+ } system_value[UREG_MAX_SYSTEM_VALUE];
+ unsigned nr_system_values;
+
+ struct {
unsigned semantic_name;
unsigned semantic_index;
} output[UREG_MAX_OUTPUT];
@@ -304,6 +312,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg,
}
+struct ureg_src
+ureg_DECL_system_value(struct ureg_program *ureg,
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index)
+{
+ if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
+ ureg->system_value[ureg->nr_system_values].index = index;
+ ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
+ ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
+ ureg->nr_system_values++;
+ } else {
+ set_bad(ureg);
+ }
+
+ return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
+}
+
+
struct ureg_dst
ureg_DECL_output( struct ureg_program *ureg,
unsigned name,
@@ -1058,6 +1085,15 @@ static void emit_decls( struct ureg_program *ureg )
}
}
+ for (i = 0; i < ureg->nr_system_values; i++) {
+ emit_decl(ureg,
+ TGSI_FILE_SYSTEM_VALUE,
+ ureg->system_value[i].index,
+ ureg->system_value[i].semantic_name,
+ ureg->system_value[i].semantic_index,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+
for (i = 0; i < ureg->nr_outputs; i++) {
emit_decl( ureg,
TGSI_FILE_OUTPUT,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 6f11273320..38e2fd8d0a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -137,6 +137,12 @@ struct ureg_src
ureg_DECL_gs_input(struct ureg_program *,
unsigned index);
+struct ureg_src
+ureg_DECL_system_value(struct ureg_program *,
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index);
+
struct ureg_dst
ureg_DECL_output( struct ureg_program *,
unsigned semantic_name,
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 34526eb061..54ed2c1a4b 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -44,12 +44,19 @@
#include "pipe/p_format.h"
#include "pipe/p_state.h"
+enum translate_element_type {
+ TRANSLATE_ELEMENT_NORMAL,
+ TRANSLATE_ELEMENT_INSTANCE_ID
+};
+
struct translate_element
{
+ enum translate_element_type type;
enum pipe_format input_format;
enum pipe_format output_format;
unsigned input_buffer:8;
unsigned input_offset:24;
+ unsigned instance_divisor;
unsigned output_offset;
};
@@ -74,11 +81,13 @@ struct translate {
void (PIPE_CDECL *run_elts)( struct translate *,
const unsigned *elts,
unsigned count,
+ unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
+ unsigned instance_id,
void *output_buffer);
};
@@ -103,8 +112,13 @@ static INLINE int translate_keysize( const struct translate_key *key )
static INLINE int translate_key_compare( const struct translate_key *a,
const struct translate_key *b )
{
- int keysize = translate_keysize(a);
- return memcmp(a, b, keysize);
+ int keysize_a = translate_keysize(a);
+ int keysize_b = translate_keysize(b);
+
+ if (keysize_a != keysize_b) {
+ return keysize_a - keysize_b;
+ }
+ return memcmp(a, b, keysize_a);
}
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 266e7ee81e..24727d4988 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -46,9 +46,12 @@ struct translate_generic {
struct translate translate;
struct {
+ enum translate_element_type type;
+
fetch_func fetch;
unsigned buffer;
unsigned input_offset;
+ unsigned instance_divisor;
emit_func emit;
unsigned output_offset;
@@ -568,6 +571,7 @@ static emit_func get_emit_func( enum pipe_format format )
static void PIPE_CDECL generic_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
@@ -583,13 +587,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
-
- const char *src = (tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt);
+ const char *src;
char *dst = (vert +
tg->attrib[attr].output_offset);
+ if (tg->attrib[attr].instance_divisor) {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride *
+ (instance_id / tg->attrib[attr].instance_divisor);
+ } else {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt;
+ }
+
tg->attrib[attr].fetch( src, data );
if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
@@ -607,6 +618,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
@@ -623,13 +635,25 @@ static void PIPE_CDECL generic_run( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
- const char *src = (tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt);
-
char *dst = (vert +
tg->attrib[attr].output_offset);
- tg->attrib[attr].fetch( src, data );
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const char *src;
+
+ if (tg->attrib[attr].instance_divisor) {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride *
+ (instance_id / tg->attrib[attr].instance_divisor);
+ } else {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt;
+ }
+
+ tg->attrib[attr].fetch( src, data );
+ } else {
+ data[0] = (float)instance_id;
+ }
if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
i, attr, data[0], data[1], data[2], data[3]);
@@ -683,10 +707,12 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
+ tg->attrib[i].type = key->element[i].type;
tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
tg->attrib[i].buffer = key->element[i].input_buffer;
tg->attrib[i].input_offset = key->element[i].input_offset;
+ tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index b62db8d8f3..8e152a002a 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -49,19 +49,31 @@
typedef void (PIPE_CDECL *run_func)( struct translate *translate,
unsigned start,
unsigned count,
- void *output_buffer );
+ unsigned instance_id,
+ void *output_buffer,
+ float instance_id_float );
typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
const unsigned *elts,
unsigned count,
- void *output_buffer );
+ unsigned instance_id,
+ void *output_buffer,
+ float instance_id_float );
struct translate_buffer {
const void *base_ptr;
unsigned stride;
- void *ptr; /* updated per vertex */
};
+struct translate_buffer_varient {
+ unsigned buffer_index;
+ unsigned instance_divisor;
+ void *ptr; /* updated either per vertex or per instance */
+};
+
+
+#define ELEMENT_BUFFER_INSTANCE_ID 1001
+
struct translate_sse {
struct translate translate;
@@ -81,6 +93,17 @@ struct translate_sse {
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
+ /* Multiple buffer varients can map to a single buffer. */
+ struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
+ unsigned nr_buffer_varients;
+
+ /* Multiple elements can map to a single buffer varient. */
+ unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
+
+ boolean use_instancing;
+ unsigned instance_id;
+ float instance_id_float; /* XXX: needed while no integer support in TGSI */
+
run_func gen_run;
run_elts_func gen_run_elts;
@@ -359,32 +382,61 @@ static boolean init_inputs( struct translate_sse *p,
boolean linear )
{
unsigned i;
- if (linear) {
- for (i = 0; i < p->nr_buffers; i++) {
+ struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+ get_offset(p, &p->instance_id));
+
+ for (i = 0; i < p->nr_buffer_varients; i++) {
+ struct translate_buffer_varient *varient = &p->buffer_varient[i];
+ struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
+
+ if (linear || varient->instance_divisor) {
struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].stride));
+ get_offset(p, &buffer->stride));
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].ptr));
+ get_offset(p, &varient->ptr));
struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].base_ptr));
+ get_offset(p, &buffer->base_ptr));
struct x86_reg elt = p->idx_EBX;
- struct x86_reg tmp = p->tmp_EAX;
-
+ struct x86_reg tmp_EAX = p->tmp_EAX;
/* Calculate pointer to first attrib:
+ * base_ptr + stride * index, where index depends on instance divisor
*/
- x86_mov(p->func, tmp, buf_stride);
- x86_imul(p->func, tmp, elt);
- x86_add(p->func, tmp, buf_base_ptr);
+ if (varient->instance_divisor) {
+ /* Our index is instance ID divided by instance divisor.
+ */
+ x86_mov(p->func, tmp_EAX, instance_id);
+
+ if (varient->instance_divisor != 1) {
+ struct x86_reg tmp_EDX = p->machine_EDX;
+ struct x86_reg tmp_ECX = p->outbuf_ECX;
+
+ /* TODO: Add x86_shr() to rtasm and use it whenever
+ * instance divisor is power of two.
+ */
+
+ x86_push(p->func, tmp_EDX);
+ x86_push(p->func, tmp_ECX);
+ x86_xor(p->func, tmp_EDX, tmp_EDX);
+ x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
+ x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
+ x86_pop(p->func, tmp_ECX);
+ x86_pop(p->func, tmp_EDX);
+ }
+ } else {
+ x86_mov(p->func, tmp_EAX, elt);
+ }
+ x86_imul(p->func, tmp_EAX, buf_stride);
+ x86_add(p->func, tmp_EAX, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (p->nr_buffers == 1)
- x86_mov( p->func, elt, tmp );
+ if (linear && p->nr_buffer_varients == 1)
+ x86_mov(p->func, elt, tmp_EAX);
else
- x86_mov( p->func, buf_ptr, tmp );
+ x86_mov(p->func, buf_ptr, tmp_EAX);
}
}
@@ -394,31 +446,36 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
boolean linear,
- unsigned buf_idx,
+ unsigned var_idx,
struct x86_reg elt )
{
- if (linear && p->nr_buffers == 1) {
+ if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
+ return x86_make_disp(p->machine_EDX,
+ get_offset(p, &p->instance_id_float));
+ }
+ if (linear && p->nr_buffer_varients == 1) {
return p->idx_EBX;
}
- else if (linear) {
+ else if (linear || p->buffer_varient[var_idx].instance_divisor) {
struct x86_reg ptr = p->tmp_EAX;
struct x86_reg buf_ptr =
x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[buf_idx].ptr));
+ get_offset(p, &p->buffer_varient[var_idx].ptr));
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
else {
struct x86_reg ptr = p->tmp_EAX;
+ const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
struct x86_reg buf_stride =
x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[buf_idx].stride));
+ get_offset(p, &p->buffer[varient->buffer_index].stride));
struct x86_reg buf_base_ptr =
x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[buf_idx].base_ptr));
+ get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
@@ -436,28 +493,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
boolean linear )
{
- if (linear && p->nr_buffers == 1) {
+ if (linear && p->nr_buffer_varients == 1) {
struct x86_reg stride = x86_make_disp(p->machine_EDX,
get_offset(p, &p->buffer[0].stride));
- x86_add(p->func, p->idx_EBX, stride);
- sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ if (p->buffer_varient[0].instance_divisor == 0) {
+ x86_add(p->func, p->idx_EBX, stride);
+ sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ }
}
else if (linear) {
unsigned i;
/* Is this worthwhile??
*/
- for (i = 0; i < p->nr_buffers; i++) {
+ for (i = 0; i < p->nr_buffer_varients; i++) {
+ struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].ptr));
+ get_offset(p, &varient->ptr));
struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
- get_offset(p, &p->buffer[i].stride));
+ get_offset(p, &p->buffer[varient->buffer_index].stride));
- x86_mov(p->func, p->tmp_EAX, buf_ptr);
- x86_add(p->func, p->tmp_EAX, buf_stride);
- if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
- x86_mov(p->func, buf_ptr, p->tmp_EAX);
+ if (varient->instance_divisor == 0) {
+ x86_mov(p->func, p->tmp_EAX, buf_ptr);
+ x86_add(p->func, p->tmp_EAX, buf_stride);
+ if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+ x86_mov(p->func, buf_ptr, p->tmp_EAX);
+ }
}
}
else {
@@ -514,7 +576,26 @@ static boolean build_vertex_emit( struct translate_sse *p,
x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
- x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4));
+ x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+
+ /* Load instance ID.
+ */
+ if (p->use_instancing) {
+ x86_mov(p->func,
+ p->tmp_EAX,
+ x86_fn_arg(p->func, 4));
+ x86_mov(p->func,
+ x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+ p->tmp_EAX);
+
+ /* XXX: temporary */
+ x86_mov(p->func,
+ p->tmp_EAX,
+ x86_fn_arg(p->func, 6));
+ x86_mov(p->func,
+ x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id_float)),
+ p->tmp_EAX);
+ }
/* Get vertex count, compare to zero
*/
@@ -531,17 +612,18 @@ static boolean build_vertex_emit( struct translate_sse *p,
label = x86_get_label(p->func);
{
struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
- int last_vb = -1;
+ int last_varient = -1;
struct x86_reg vb;
for (j = 0; j < p->translate.key.nr_elements; j++) {
const struct translate_element *a = &p->translate.key.element[j];
+ unsigned varient = p->element_to_buffer_varient[j];
/* Figure out source pointer address:
*/
- if (a->input_buffer != last_vb) {
- last_vb = a->input_buffer;
- vb = get_buffer_ptr(p, linear, a->input_buffer, elt);
+ if (varient != last_varient) {
+ last_varient = varient;
+ vb = get_buffer_ptr(p, linear, varient, elt);
}
if (!translate_attr( p, a,
@@ -624,6 +706,7 @@ static void translate_sse_release( struct translate *translate )
static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_sse *p = (struct translate_sse *)translate;
@@ -631,12 +714,15 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
p->gen_run_elts( translate,
elts,
count,
- output_buffer );
+ instance_id,
+ output_buffer,
+ (float)instance_id );
}
static void PIPE_CDECL translate_sse_run( struct translate *translate,
unsigned start,
unsigned count,
+ unsigned instance_id,
void *output_buffer )
{
struct translate_sse *p = (struct translate_sse *)translate;
@@ -644,7 +730,9 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate,
p->gen_run( translate,
start,
count,
- output_buffer );
+ instance_id,
+ output_buffer,
+ (float)instance_id);
}
@@ -666,8 +754,37 @@ struct translate *translate_sse2_create( const struct translate_key *key )
p->translate.run_elts = translate_sse_run_elts;
p->translate.run = translate_sse_run;
- for (i = 0; i < key->nr_elements; i++)
- p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 );
+ for (i = 0; i < key->nr_elements; i++) {
+ if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
+ unsigned j;
+
+ p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
+
+ if (key->element[i].instance_divisor) {
+ p->use_instancing = TRUE;
+ }
+
+ /*
+ * Map vertex element to vertex buffer varient.
+ */
+ for (j = 0; j < p->nr_buffer_varients; j++) {
+ if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
+ p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
+ break;
+ }
+ }
+ if (j == p->nr_buffer_varients) {
+ p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
+ p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
+ p->nr_buffer_varients++;
+ }
+ p->element_to_buffer_varient[i] = j;
+ } else {
+ assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
+
+ p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
+ }
+ }
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);