diff options
Diffstat (limited to 'src/gallium/auxiliary')
53 files changed, 1860 insertions, 1001 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 7c7702549e..dab95e5051 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -435,13 +435,18 @@ draw_num_shader_outputs(const struct draw_context *draw) */ void draw_texture_samplers(struct draw_context *draw, + uint shader, uint num_samplers, struct tgsi_sampler **samplers) { - draw->vs.num_samplers = num_samplers; - draw->vs.samplers = samplers; - draw->gs.num_samplers = num_samplers; - draw->gs.samplers = samplers; + if (shader == PIPE_SHADER_VERTEX) { + draw->vs.num_samplers = num_samplers; + draw->vs.samplers = samplers; + } else { + debug_assert(shader == PIPE_SHADER_GEOMETRY); + draw->gs.num_samplers = num_samplers; + draw->gs.samplers = samplers; + } } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 103d6538b8..c0122f2aca 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -97,6 +97,7 @@ draw_num_shader_outputs(const struct draw_context *draw); void draw_texture_samplers(struct draw_context *draw, + uint shader_type, uint num_samplers, struct tgsi_sampler **samplers); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index a1ca7071e3..79a57a67f3 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -39,7 +39,6 @@ #include "util/u_memory.h" #include "util/u_prim.h" -#define MAX_PRIM_VERTICES 6 /* fixme: move it from here */ #define MAX_PRIMITIVES 64 @@ -76,6 +75,7 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { + /* noop */ } @@ -171,9 +171,10 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, /* Unswizzle all output results. */ - shader->emitted_primitives += num_primitives; for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { unsigned num_verts_per_prim = machine->Primitives[prim_idx]; + shader->primitive_lengths[prim_idx + shader->emitted_primitives] = + machine->Primitives[prim_idx]; shader->emitted_vertices += num_verts_per_prim; for (j = 0; j < num_verts_per_prim; j++) { int idx = (prim_idx * num_verts_per_prim + j) * @@ -199,9 +200,10 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, } } *p_output = output; + shader->emitted_primitives += num_primitives; } - +/*#define DEBUG_INPUTS 1*/ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, unsigned *indices, unsigned num_vertices, @@ -216,19 +218,28 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, for (i = 0; i < num_vertices; ++i) { const float (*input)[4]; - /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ +#if DEBUG_INPUTS + debug_printf("%d) vertex index = %d (prim idx = %d)\n", + i, indices[i], prim_idx); +#endif input = (const float (*)[4])( (const char *)input_ptr + (indices[i] * input_vertex_stride)); for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { - machine->Inputs[idx].xyzw[0].f[prim_idx] = (float)shader->in_prim_idx; - machine->Inputs[idx].xyzw[1].f[prim_idx] = (float)shader->in_prim_idx; - machine->Inputs[idx].xyzw[2].f[prim_idx] = (float)shader->in_prim_idx; - machine->Inputs[idx].xyzw[3].f[prim_idx] = (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[0].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[1].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[2].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[3].f[prim_idx] = + (float)shader->in_prim_idx; } else { - /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", - slot, vs_slot, idx);*/ +#if DEBUG_INPUTS + debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx); +#endif #if 1 assert(!util_is_inf_or_nan(input[vs_slot][0])); assert(!util_is_inf_or_nan(input[vs_slot][1])); @@ -239,7 +250,7 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; -#if 0 +#if DEBUG_INPUTS debug_printf("\t\t%f %f %f %f\n", machine->Inputs[idx].xyzw[0].f[prim_idx], machine->Inputs[idx].xyzw[1].f[prim_idx], @@ -252,7 +263,6 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, } } - static void gs_flush(struct draw_geometry_shader *shader, unsigned input_primitives) { @@ -274,6 +284,11 @@ static void gs_flush(struct draw_geometry_shader *shader, out_prim_count = machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; +#if 0 + debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", + shader->emitted_primitives, shader->emitted_vertices, + out_prim_count); +#endif draw_geometry_fetch_outputs(shader, out_prim_count, &shader->tmp_output); } @@ -305,6 +320,22 @@ static void gs_line(struct draw_geometry_shader *shader, gs_flush(shader, 1); } +static void gs_line_adj(struct draw_geometry_shader *shader, + int i0, int i1, int i2, int i3) +{ + unsigned indices[4]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + + draw_fetch_gs_input(shader, indices, 4, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + static void gs_tri(struct draw_geometry_shader *shader, int i0, int i1, int i2) { @@ -320,58 +351,130 @@ static void gs_tri(struct draw_geometry_shader *shader, gs_flush(shader, 1); } +static void gs_tri_adj(struct draw_geometry_shader *shader, + int i0, int i1, int i2, + int i3, int i4, int i5) +{ + unsigned indices[6]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + indices[4] = i4; + indices[5] = i5; + + draw_fetch_gs_input(shader, indices, 6, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + #define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2) -#define LINE(gs,i0,i1) gs_line(gs,i0,i1) +#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) +#define LINE(gs,i0,i1) gs_line(gs,i0,i1) +#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) #define POINT(gs,i0) gs_point(gs,i0) #define FUNC gs_run +#define LOCAL_VARS +#include "draw_gs_tmp.h" + + +#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2]) +#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \ + gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \ + elts[i4],elts[i5]) +#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1]) +#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \ + elts[i1], \ + elts[i2],elts[i3]) +#define POINT(gs,i0) gs_point(gs,elts[i0]) +#define FUNC gs_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; #include "draw_gs_tmp.h" int draw_geometry_shader_run(struct draw_geometry_shader *shader, - unsigned pipe_prim, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned vertex_size) + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ) { + const float (*input)[4] = (const float (*)[4])input_verts->verts->data; + unsigned input_stride = input_verts->vertex_size; + unsigned vertex_size = input_verts->vertex_size; struct tgsi_exec_machine *machine = shader->machine; unsigned int i; + unsigned num_input_verts = input_prim->linear ? + input_verts->count : + input_prim->count; unsigned num_in_primitives = - u_gs_prims_for_vertices(pipe_prim, count); - unsigned alloc_count = draw_max_output_vertices(shader->draw, - pipe_prim, - count); - /* this is bad, but we can't be overwriting the output array - * because it's the same as input array here */ - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(vertex_size * alloc_count); + MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), + u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)); + unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive, + shader->max_output_vertices) + * num_in_primitives; + + output_verts->vertex_size = input_verts->vertex_size; + output_verts->stride = input_verts->vertex_size; + output_verts->verts = + (struct vertex_header *)MALLOC(input_verts->vertex_size * + num_in_primitives * + shader->max_output_vertices); - if (!pipeline_verts) - return 0; - if (0) debug_printf("%s count = %d (prims = %d)\n", __FUNCTION__, - count, num_in_primitives); +#if 0 + debug_printf("%s count = %d (in prims # = %d)\n", + __FUNCTION__, num_input_verts, num_in_primitives); + debug_printf("\tlinear = %d, prim_info->count = %d\n", + input_prim->linear, input_prim->count); + debug_printf("\tprimt pipe = %d, shader in = %d, shader out = %d, max out = %d\n", + input_prim->prim, shader->input_primitive, + shader->output_primitive, + shader->max_output_vertices); +#endif shader->emitted_vertices = 0; shader->emitted_primitives = 0; shader->vertex_size = vertex_size; - shader->tmp_output = ( float (*)[4])pipeline_verts->data; + shader->tmp_output = (float (*)[4])output_verts->verts->data; shader->in_prim_idx = 0; shader->input_vertex_stride = input_stride; shader->input = input; + if (shader->primitive_lengths) { + FREE(shader->primitive_lengths); + } + shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { machine->Consts[i] = constants[i]; } - gs_run(shader, pipe_prim, count); + if (input_prim->linear) + gs_run(shader, input_prim, input_verts, + output_prims, output_verts); + else + gs_run_elts(shader, input_prim, input_verts, + output_prims, output_verts); - memcpy(output, pipeline_verts->data, - shader->info.num_outputs * 4 * sizeof(float) + - vertex_size * (shader->emitted_vertices -1)); + /* Update prim_info: + */ + output_prims->linear = TRUE; + output_prims->elts = NULL; + output_prims->start = 0; + output_prims->count = shader->emitted_vertices; + output_prims->prim = shader->output_primitive; + output_prims->primitive_lengths = shader->primitive_lengths; + output_prims->primitive_count = shader->emitted_primitives; + output_verts->count = shader->emitted_vertices; + +#if 0 + debug_printf("GS finished, prims = %d, verts = %d\n", + output_prims->primitive_count, + output_verts->count); +#endif - FREE(pipeline_verts); return shader->emitted_vertices; } @@ -391,24 +494,3 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, draw->gs.samplers); } } - -int draw_max_output_vertices(struct draw_context *draw, - unsigned pipe_prim, - unsigned count) -{ - unsigned alloc_count = align( count, 4 ); - - if (draw->gs.geometry_shader) { - unsigned input_primitives = u_gs_prims_for_vertices(pipe_prim, - count); - /* max GS output is number of input primitives * max output - * vertices per each invocation */ - unsigned gs_max_verts = input_primitives * - draw->gs.geometry_shader->max_output_vertices; - if (gs_max_verts > count) - alloc_count = align(gs_max_verts, 4); - } - /*debug_printf("------- alloc count = %d (input = %d)\n", - alloc_count, count);*/ - return alloc_count; -} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 65f0c61916..2cb634818c 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -54,6 +54,7 @@ struct draw_geometry_shader { unsigned input_primitive; unsigned output_primitive; + unsigned *primitive_lengths; unsigned emitted_vertices; unsigned emitted_primitives; @@ -71,13 +72,11 @@ struct draw_geometry_shader { * smaller than the GS_MAX_OUTPUT_VERTICES shader property. */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, - unsigned pipe_prim, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned output_stride); + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ); void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h index eb4a313ca1..7a8683cf7c 100644 --- a/src/gallium/auxiliary/draw/draw_gs_tmp.h +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -1,18 +1,23 @@ static void FUNC( struct draw_geometry_shader *shader, - unsigned pipe_prim, - unsigned count ) + const struct draw_prim_info *input_prims, + const struct draw_vertex_info *input_verts, + struct draw_prim_info *output_prims, + struct draw_vertex_info *output_verts) { struct draw_context *draw = shader->draw; boolean flatfirst = (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first); - unsigned i; + unsigned i, j; + unsigned count = input_prims->count; + LOCAL_VARS if (0) debug_printf("%s %d\n", __FUNCTION__, count); + debug_assert(input_prims->primitive_count == 1); - switch (pipe_prim) { + switch (input_prims->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i++) { POINT( shader, i + 0 ); @@ -90,20 +95,6 @@ static void FUNC( struct draw_geometry_shader *shader, case PIPE_PRIM_POLYGON: { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - ushort edge_next, edge_finish; - - if (flatfirst) { - edge_next = DRAW_PIPE_EDGE_FLAG_2; - edge_finish = DRAW_PIPE_EDGE_FLAG_0; - } - else { - edge_next = DRAW_PIPE_EDGE_FLAG_0; - edge_finish = DRAW_PIPE_EDGE_FLAG_1; - } - for (i = 0; i+2 < count; i++) { if (flatfirst) { @@ -116,14 +107,46 @@ static void FUNC( struct draw_geometry_shader *shader, } break; + case PIPE_PRIM_LINES_ADJACENCY: + for (i = 0; i+3 < count; i += 4) { + LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 ); + } + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + for (i = 1; i + 2 < count; i++) { + LINE_ADJ( shader, i - 1, i, i + 1, i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + for (i = 0; i+5 < count; i += 5) { + TRI_ADJ( shader, i + 0, i + 1, i + 2, + i + 3, i + 4, i + 5); + } + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + for (i = 0, j = 0; i+5 < count; i += 2, ++j) { + TRI_ADJ( shader, + i + 0, + i + 1 + 2*(j&1), + i + 2 + 2*(j&1), + i + 3 - 2*(j&1), + i + 4 - 2*(j&1), + i + 5); + } + break; + default: - assert(0); + debug_assert(!"Unsupported primitive in geometry shader"); break; } } #undef TRIANGLE +#undef TRI_ADJ #undef POINT #undef LINE +#undef LINE_ADJ #undef FUNC +#undef LOCAL_VARS diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index bd5d8853cf..9117c1303d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -173,6 +173,8 @@ draw_llvm_create(struct draw_context *draw) #endif llvm = CALLOC_STRUCT( draw_llvm ); + if (!llvm) + return NULL; llvm->draw = draw; llvm->engine = draw->engine; diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 7ea04e3819..a8b9dc6014 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -177,15 +177,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -193,15 +193,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i3]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ @@ -218,7 +218,7 @@ static void do_triangle( struct draw_context *draw, #define POINT(i0) \ do_point( draw, \ - verts + stride * elts[i0] ) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run #define ARGS \ @@ -256,27 +256,34 @@ static void do_triangle( struct draw_context *draw, * draw_vbuf.c code uses when it has to perform a flush. */ void draw_pipeline_run( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { - char *verts = (char *)vertices; - - draw->pipeline.verts = verts; - draw->pipeline.vertex_stride = stride; - draw->pipeline.vertex_count = vertex_count; - - pipe_run(draw, prim, vertices, stride, elts, count); + unsigned i, start; + draw->pipeline.verts = (char *)vert_info->verts; + draw->pipeline.vertex_stride = vert_info->stride; + draw->pipeline.vertex_count = vert_info->count; + + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + unsigned count = prim_info->primitive_lengths[i]; + + pipe_run(draw, + prim_info->prim, + vert_info->verts, + vert_info->stride, + prim_info->elts + start, + count); + } + draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; } - /* * Set up macros for draw_pt_decompose.h template code. * This code is for non-indexed (aka linear) rendering (no elts). @@ -289,14 +296,14 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -305,31 +312,31 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i3)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) #define LINE(flags,i0,i1) \ do_line( draw, \ flags, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) #define POINT(i0) \ do_point( draw, \ - verts + stride * i0 ) + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run_linear #define ARGS \ @@ -354,17 +361,29 @@ void draw_pipeline_run( struct draw_context *draw, * For drawing non-indexed primitives. */ void draw_pipeline_run_linear( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { - char *verts = (char *)vertices; - draw->pipeline.verts = verts; - draw->pipeline.vertex_stride = stride; - draw->pipeline.vertex_count = count; - - pipe_run_linear(draw, prim, vertices, stride, count); + unsigned i, start; + + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + unsigned count = prim_info->primitive_lengths[i]; + char *verts = ((char*)vert_info->verts) + + (start * vert_info->stride); + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = vert_info->stride; + draw->pipeline.vertex_count = count; + + pipe_run_linear(draw, + prim_info->prim, + (struct vertex_header*)verts, + vert_info->stride, + count); + } draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 4faf0a779c..debd17fd74 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -788,9 +788,6 @@ draw_aaline_stage(struct draw_context *draw) if (aaline == NULL) return NULL; - if (!draw_alloc_temp_verts( &aaline->stage, 8 )) - goto fail; - aaline->stage.draw = draw; aaline->stage.name = "aaline"; aaline->stage.next = NULL; @@ -801,11 +798,14 @@ draw_aaline_stage(struct draw_context *draw) aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; aaline->stage.destroy = aaline_destroy; + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; + return aaline; fail: if (aaline) - aaline_destroy(&aaline->stage); + aaline->stage.destroy(&aaline->stage); return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index bba6f50c02..d406a86ccb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -780,9 +780,6 @@ draw_aapoint_stage(struct draw_context *draw) if (aapoint == NULL) goto fail; - if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) - goto fail; - aapoint->stage.draw = draw; aapoint->stage.name = "aapoint"; aapoint->stage.next = NULL; @@ -793,11 +790,14 @@ draw_aapoint_stage(struct draw_context *draw) aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; + return aapoint; fail: if (aapoint) - aapoint_destroy(&aapoint->stage); + aapoint->stage.destroy(&aapoint->stage); return NULL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index df8d82e367..122b1c7968 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -522,9 +522,6 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) if (clipper == NULL) goto fail; - if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) - goto fail; - clipper->stage.draw = draw; clipper->stage.name = "clipper"; clipper->stage.point = clip_point; @@ -536,6 +533,9 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) clipper->plane = draw->plane; + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; + return &clipper->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index bf84ce30ed..2f4d01d23a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -129,9 +129,6 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) if (cull == NULL) goto fail; - if (!draw_alloc_temp_verts( &cull->stage, 0 )) - goto fail; - cull->stage.draw = draw; cull->stage.name = "cull"; cull->stage.next = NULL; @@ -142,6 +139,9 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) cull->stage.reset_stipple_counter = cull_reset_stipple_counter; cull->stage.destroy = cull_destroy; + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; + return &cull->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 34afb1a0b6..693f2895aa 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -257,9 +257,6 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) if (flatshade == NULL) goto fail; - if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) - goto fail; - flatshade->stage.draw = draw; flatshade->stage.name = "flatshade"; flatshade->stage.next = NULL; @@ -270,6 +267,9 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; flatshade->stage.destroy = flatshade_destroy; + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; + return &flatshade->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 8e321946ce..8afbbfa156 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -161,9 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); if (offset == NULL) - return NULL; - - draw_alloc_temp_verts( &offset->stage, 3 ); + goto fail; offset->stage.draw = draw; offset->stage.name = "offset"; @@ -175,5 +173,14 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) offset->stage.reset_stipple_counter = offset_reset_stipple_counter; offset->stage.destroy = offset_destroy; + if (!draw_alloc_temp_verts( &offset->stage, 3 )) + goto fail; + return &offset->stage; + +fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index ef30db094f..fff960c7eb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -607,8 +607,8 @@ static struct pstip_stage * draw_pstip_stage(struct draw_context *draw) { struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); - - draw_alloc_temp_verts( &pstip->stage, 8 ); + if (pstip == NULL) + goto fail; pstip->stage.draw = draw; pstip->stage.name = "pstip"; @@ -620,7 +620,16 @@ draw_pstip_stage(struct draw_context *draw) pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; pstip->stage.destroy = pstip_destroy; + if (!draw_alloc_temp_verts( &pstip->stage, 8 )) + goto fail; + return pstip; + +fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 70fbab9ea7..4b3f4e7ae1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -235,8 +235,8 @@ stipple_destroy( struct draw_stage *stage ) struct draw_stage *draw_stipple_stage( struct draw_context *draw ) { struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); - - draw_alloc_temp_verts( &stipple->stage, 2 ); + if (stipple == NULL) + goto fail; stipple->stage.draw = draw; stipple->stage.name = "stipple"; @@ -248,5 +248,14 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; + if (!draw_alloc_temp_verts( &stipple->stage, 2 )) + goto fail; + return &stipple->stage; + +fail: + if (stipple) + stipple->stage.destroy( &stipple->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 808b2fb0b5..9a3f3fee62 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -177,9 +177,6 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) if (twoside == NULL) goto fail; - if (!draw_alloc_temp_verts( &twoside->stage, 3 )) - goto fail; - twoside->stage.draw = draw; twoside->stage.name = "twoside"; twoside->stage.next = NULL; @@ -190,6 +187,9 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; twoside->stage.destroy = twoside_destroy; + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; + return &twoside->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index e333d26a93..d87741b91e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -202,9 +202,6 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) if (unfilled == NULL) goto fail; - if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) - goto fail; - unfilled->stage.draw = draw; unfilled->stage.name = "unfilled"; unfilled->stage.next = NULL; @@ -216,6 +213,9 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; unfilled->stage.destroy = unfilled_destroy; + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; + return &unfilled->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index d7ac95b740..98da9cfb99 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -202,8 +202,8 @@ static void wideline_destroy( struct draw_stage *stage ) struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) { struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); + if (wide == NULL) + goto fail; wide->stage.draw = draw; wide->stage.name = "wide-line"; @@ -215,5 +215,14 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + return &wide->stage; + +fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index a86fe19586..3e6e538995 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -324,9 +324,6 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) if (wide == NULL) goto fail; - if (!draw_alloc_temp_verts( &wide->stage, 4 )) - goto fail; - wide->stage.draw = draw; wide->stage.name = "wide-point"; wide->stage.next = NULL; @@ -337,6 +334,9 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; wide->stage.destroy = widepoint_destroy; + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + return &wide->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index fe867ff8e2..4584033bc2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -265,6 +265,34 @@ struct draw_context void *driver_private; }; + +struct draw_fetch_info { + boolean linear; + unsigned start; + const unsigned *elts; + unsigned count; +}; + +struct draw_vertex_info { + struct vertex_header *verts; + unsigned vertex_size; + unsigned stride; + unsigned count; +}; + +struct draw_prim_info { + boolean linear; + unsigned start; + + const ushort *elts; + unsigned count; + + unsigned prim; + unsigned *primitive_lengths; + unsigned primitive_count; +}; + + /******************************************************************************* * Draw common initialization code */ @@ -342,18 +370,13 @@ void draw_pipeline_destroy( struct draw_context *draw ); #define DRAW_PIPE_FLAG_MASK (0xf<<12) void draw_pipeline_run( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ); + const struct draw_vertex_info *vert, + const struct draw_prim_info *prim); void draw_pipeline_run_linear( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned count, - unsigned stride ); + const struct draw_vertex_info *vert, + const struct draw_prim_info *prim); + @@ -380,9 +403,4 @@ draw_get_rasterizer_no_cull( struct draw_context *draw, boolean flatshade ); -int draw_max_output_vertices(struct draw_context *draw, - unsigned pipe_prim, - unsigned count); - - #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 02c97fec81..6234272d6c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -69,7 +69,6 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; - unsigned out_prim = prim; /* Sanitize primitive length: */ @@ -80,18 +79,19 @@ draw_pt_arrays(struct draw_context *draw, if (count < first) return TRUE; } - if (draw->gs.geometry_shader) { - out_prim = draw->gs.geometry_shader->output_primitive; - } if (!draw->force_passthrough) { + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + if (!draw->render) { opt |= PT_PIPELINE; } if (draw_need_pipeline(draw, draw->rasterizer, - out_prim)) { + gs_out_prim)) { opt |= PT_PIPELINE; } @@ -122,7 +122,7 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.varray; } - frontend->prepare( frontend, prim, out_prim, middle, opt ); + frontend->prepare( frontend, prim, middle, opt ); frontend->run(frontend, draw_pt_elt_func(draw), diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 67ae70fdaf..44356fba4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -39,6 +39,8 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); struct draw_pt_middle_end; struct draw_context; +struct draw_prim_info; +struct draw_vertex_info; #define PT_SHADE 0x1 @@ -60,8 +62,7 @@ struct draw_context; */ struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, struct draw_pt_middle_end *, unsigned opt ); @@ -85,8 +86,7 @@ struct draw_pt_front_end { */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ); @@ -164,16 +164,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned *max_vertices ); void draw_pt_emit( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info); void draw_pt_emit_linear( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned stride, - unsigned count ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info); void draw_pt_emit_destroy( struct pt_emit *emit ); @@ -184,13 +180,11 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); */ struct pt_so_emit; -void draw_pt_so_emit_prepare( struct pt_so_emit *emit, - unsigned prim ); +void draw_pt_so_emit_prepare( struct pt_so_emit *emit ); void draw_pt_so_emit( struct pt_so_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info ); void draw_pt_so_emit_destroy( struct pt_so_emit *emit ); @@ -226,9 +220,7 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); struct pt_post_vs; boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, - struct vertex_header *pipeline_verts, - unsigned stride, - unsigned count ); + struct draw_vertex_info *info ); void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, boolean bypass_clipping, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index f623c0743d..0229bcc7fe 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -127,15 +127,17 @@ void draw_pt_emit_prepare( struct pt_emit *emit, void draw_pt_emit( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { + const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; + unsigned vertex_count = vert_info->count; + unsigned stride = vert_info->stride; + const ushort *elts = prim_info->elts; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; + unsigned start, i; void *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? @@ -190,23 +192,31 @@ void draw_pt_emit( struct pt_emit *emit, 0, vertex_count - 1 ); - render->draw_elements(render, - elts, - count); + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + render->draw_elements(render, + elts + start, + prim_info->primitive_lengths[i]); + } render->release_vertices(render); } void draw_pt_emit_linear(struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned stride, - unsigned count) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { + const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; + unsigned stride = vert_info->stride; + unsigned count = vert_info->count; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; void *hw_verts; + unsigned start, i; #if 0 debug_printf("Linear emit\n"); @@ -258,7 +268,14 @@ void draw_pt_emit_linear(struct pt_emit *emit, render->unmap_vertices( render, 0, count - 1 ); - render->draw_arrays(render, 0, count); + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + render->draw_arrays(render, + start, + prim_info->primitive_lengths[i]); + } render->release_vertices(render); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index a1347221b5..bf799db352 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -29,7 +29,6 @@ #include "util/u_math.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vbuf.h" #include "draw/draw_pt.h" #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index c629d55563..5c8af17c8e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_gs.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -90,7 +91,6 @@ struct fetch_emit_middle_end { static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { @@ -101,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, boolean ok; struct translate_key key; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + + ok = draw->render->set_primitive( draw->render, - out_prim ); + gs_out_prim ); if (!ok) { assert(0); return; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5483a25f1d..b8270280b6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -68,8 +68,7 @@ struct fetch_shade_emit { static void fse_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -80,9 +79,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned nr_vbs = 0; + /* Can't support geometry shader on this path. + */ + assert(!draw->gs.geometry_shader); if (!draw->render->set_primitive( draw->render, - out_prim )) { + prim )) { assert(0); return; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 2301e542aa..24c538b099 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -48,13 +48,11 @@ struct fetch_pipeline_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; }; static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -64,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -79,8 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } } - fpme->input_prim = in_prim; - fpme->output_prim = out_prim; + fpme->input_prim = prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -102,13 +103,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); - draw_pt_so_emit_prepare( fpme->so_emit, out_prim ); + draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, - out_prim, + gs_out_prim, max_vertices ); *max_vertices = MAX2( *max_vertices, @@ -127,173 +128,146 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } - -static void fetch_pipeline_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) +static void fetch( struct pt_fetch *fetch, + const struct draw_fetch_info *fetch_info, + char *output) { - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; - unsigned opt = fpme->opt; - struct vertex_header *pipeline_verts; - unsigned alloc_count = draw_max_output_vertices(draw, - fpme->input_prim, - fetch_count); - - pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; + if (fetch_info->linear) { + draw_pt_fetch_run_linear( fetch, + fetch_info->start, + fetch_info->count, + output ); } - - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. - */ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) { - fetch_count = - draw_geometry_shader_run(gshader, - fpme->input_prim, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - debug_assert(fetch_count <= alloc_count); - } + else { + draw_pt_fetch_run( fetch, + fetch_info->elts, + fetch_info->count, + output ); } +} - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - fetch_count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { +static void pipeline(struct fetch_pipeline_middle_end *fpme, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) + draw_pipeline_run_linear( fpme->draw, + vert_info, + prim_info); + else draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + vert_info, + prim_info ); +} + +static void emit(struct pt_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) { + draw_pt_emit_linear(emit, vert_info, prim_info); } else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + draw_pt_emit(emit, vert_info, prim_info); } +} - FREE(pipeline_verts); +static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + struct draw_vertex_info *output_verts ) +{ + output_verts->vertex_size = input_verts->vertex_size; + output_verts->stride = input_verts->vertex_size; + output_verts->count = input_verts->count; + output_verts->verts = + (struct vertex_header *)MALLOC(output_verts->vertex_size * + output_verts->count); + + vshader->run_linear(vshader, + (const float (*)[4])input_verts->verts->data, + ( float (*)[4])output_verts->verts->data, + constants, + input_verts->count, + input_verts->vertex_size, + input_verts->vertex_size); } - -static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count) +static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, + const struct draw_fetch_info *fetch_info, + const struct draw_prim_info *prim_info ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; - struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + struct draw_prim_info gs_prim_info; + struct draw_vertex_info fetched_vert_info; + struct draw_vertex_info vs_vert_info; + struct draw_vertex_info gs_vert_info; + struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; - struct vertex_header *pipeline_verts; - unsigned alloc_count = draw_max_output_vertices(draw, - fpme->input_prim, - count); - pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ + fetched_vert_info.count = fetch_info->count; + fetched_vert_info.vertex_size = fpme->vertex_size; + fetched_vert_info.stride = fpme->vertex_size; + fetched_vert_info.verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * + align(fetch_info->count, 4)); + if (!fetched_vert_info.verts) { assert(0); return; } - /* Fetch into our vertex buffer + /* Fetch into our vertex buffer. + */ + fetch( fpme->fetch, fetch_info, (char *)fetched_vert_info.verts ); + + /* Finished with fetch: */ - draw_pt_fetch_run_linear( fpme->fetch, - start, - count, - (char *)pipeline_verts ); + fetch_info = NULL; + vert_info = &fetched_vert_info; /* Run the shader, note that this overwrites the data[] parts of * the pipeline verts. */ - if (opt & PT_SHADE) - { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - - if (geometry_shader) { - count = - draw_geometry_shader_run(geometry_shader, - fpme->input_prim, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - debug_assert(count <= alloc_count); - } + if (fpme->opt & PT_SHADE) { + draw_vertex_shader_run(vshader, + draw->pt.user.vs_constants, + vert_info, + &vs_vert_info); + + FREE(vert_info->verts); + vert_info = &vs_vert_info; + } + + if ((fpme->opt & PT_SHADE) && gshader) { + draw_geometry_shader_run(gshader, + draw->pt.user.gs_constants, + vert_info, + prim_info, + &gs_vert_info, + &gs_prim_info); + + FREE(vert_info->verts); + vert_info = &gs_vert_info; + prim_info = &gs_prim_info; } - /* stream output needs to be done before clipping */ + + /* Stream output needs to be done before clipping. + * + * XXX: Stream output surely needs to respect the prim_info->elt + * lists. + */ draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); + vert_info, + prim_info ); if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) + vert_info )) { opt |= PT_PIPELINE; } @@ -301,115 +275,96 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run_linear( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size); + pipeline( fpme, + vert_info, + prim_info ); } else { - draw_pt_emit_linear( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fpme->vertex_size, - count ); + emit( fpme->emit, + vert_info, + prim_info ); } - - FREE(pipeline_verts); + FREE(vert_info->verts); } +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = FALSE; + fetch_info.start = 0; + fetch_info.elts = fetch_elts; + fetch_info.count = fetch_count; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); +} -static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count, - const ushort *draw_elts, - unsigned draw_count ) +static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; - struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; - unsigned opt = fpme->opt; - struct vertex_header *pipeline_verts; - unsigned alloc_count = draw_max_output_vertices(draw, - fpme->input_prim, - count); - - pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = TRUE; + prim_info.start = 0; + prim_info.count = count; + prim_info.elts = NULL; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &count; + + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); +} - if (!pipeline_verts) - return FALSE; - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run_linear( fpme->fetch, - start, - count, - (char *)pipeline_verts ); - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. - */ - if (opt & PT_SHADE) - { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - - if (geometry_shader) { - count = - draw_geometry_shader_run(geometry_shader, - fpme->input_prim, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - debug_assert(count <= alloc_count); - } - } +static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } - else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); - FREE(pipeline_verts); return TRUE; } @@ -464,7 +419,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * goto fail; fpme->emit = draw_pt_emit_create( draw ); - if (!fpme->emit) + if (!fpme->emit) goto fail; fpme->so_emit = draw_pt_so_emit_create( draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 5f6d23874f..c7f76397e7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -28,6 +28,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "draw/draw_context.h" +#include "draw/draw_gs.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" @@ -48,7 +49,6 @@ struct llvm_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; struct draw_llvm *llvm; @@ -61,7 +61,6 @@ struct llvm_middle_end { static void llvm_middle_end_prepare( struct draw_pt_middle_end *middle, unsigned in_prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { @@ -73,6 +72,11 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + + unsigned out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + in_prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -89,7 +93,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } fpme->input_prim = in_prim; - fpme->output_prim = out_prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -106,9 +109,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)(draw->identity_viewport), (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); + + draw_pt_so_emit_prepare( fpme->so_emit ); - draw_pt_so_emit_prepare( fpme->so_emit, out_prim ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, out_prim, @@ -150,72 +154,142 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } +static void pipeline(struct llvm_middle_end *llvm, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) + draw_pipeline_run_linear( llvm->draw, + vert_info, + prim_info); + else + draw_pipeline_run( llvm->draw, + vert_info, + prim_info ); +} -static void llvm_middle_end_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) +static void emit(struct pt_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) { + draw_pt_emit_linear(emit, vert_info, prim_info); + } + else { + draw_pt_emit(emit, vert_info, prim_info); + } +} + +static void +llvm_pipeline_generic( struct draw_pt_middle_end *middle, + const struct draw_fetch_info *fetch_info, + const struct draw_prim_info *prim_info ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + struct draw_prim_info gs_prim_info; + struct draw_vertex_info llvm_vert_info; + struct draw_vertex_info gs_vert_info; + struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; - unsigned alloc_count = align( fetch_count, 4 ); - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ + llvm_vert_info.count = fetch_info->count; + llvm_vert_info.vertex_size = fpme->vertex_size; + llvm_vert_info.stride = fpme->vertex_size; + llvm_vert_info.verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * + align(fetch_info->count, 4)); + if (!llvm_vert_info.verts) { assert(0); return; } - fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - fetch_elts, - fetch_count, - fpme->vertex_size, - draw->pt.vertex_buffer ); + if (fetch_info->linear) + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + llvm_vert_info.verts, + (const char **)draw->pt.user.vbuffer, + fetch_info->start, + fetch_info->count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + else + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + llvm_vert_info.verts, + (const char **)draw->pt.user.vbuffer, + fetch_info->elts, + fetch_info->count, + fpme->vertex_size, + draw->pt.vertex_buffer); + + /* Finished with fetch and vs: + */ + fetch_info = NULL; + vert_info = &llvm_vert_info; + + + if ((opt & PT_SHADE) && gshader) { + draw_geometry_shader_run(gshader, + draw->pt.user.gs_constants, + vert_info, + prim_info, + &gs_vert_info, + &gs_prim_info); + + FREE(vert_info->verts); + vert_info = &gs_vert_info; + prim_info = &gs_prim_info; + } /* stream output needs to be done before clipping */ draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - fetch_count, - fpme->vertex_size )) - { + vert_info, + prim_info ); + + if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) { opt |= PT_PIPELINE; } /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + pipeline( fpme, + vert_info, + prim_info ); } else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + emit( fpme->emit, + vert_info, + prim_info ); } + FREE(vert_info->verts); +} - FREE(pipeline_verts); +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = FALSE; + fetch_info.start = 0; + fetch_info.elts = fetch_elts; + fetch_info.count = fetch_count; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); } @@ -224,63 +298,23 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, unsigned count) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); - - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; - } - -#if 0 - debug_printf("#### Pipeline = %p (data = %p)\n", - pipeline_verts, pipeline_verts->data); -#endif - fpme->current_variant->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } - - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run_linear( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size); - } - else { - draw_pt_emit_linear( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fpme->vertex_size, - count ); - } - - FREE(pipeline_verts); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = TRUE; + prim_info.start = 0; + prim_info.count = count; + prim_info.elts = NULL; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); } @@ -293,59 +327,24 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, unsigned draw_count ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; - if (!pipeline_verts) - return FALSE; - - fpme->current_variant->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } - else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); - FREE(pipeline_verts); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index fd33a548b4..112be50f9a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -29,16 +29,13 @@ #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vbuf.h" #include "draw/draw_pt.h" struct pt_post_vs { struct draw_context *draw; boolean (*run)( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ); + struct draw_vertex_info *info ); }; @@ -92,20 +89,18 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) * instructions */ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned clipped = 0; unsigned j; - if (0) debug_printf("%s count, %d\n", __FUNCTION__, count); + if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count); - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { float *position = out->data[pos]; #if 0 @@ -143,7 +138,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, #endif } - out = (struct vertex_header *)( (char *)out + stride ); + out = (struct vertex_header *)( (char *)out + info->stride ); } return clipped != 0; @@ -153,29 +148,27 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, /* As above plus edgeflags */ -static boolean +static boolean post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info) { unsigned j; boolean needpipe; - needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride); + needpipe = post_vs_cliptest_viewport_gl(pvs, info); /* If present, copy edgeflag VS output into vertex header. * Otherwise, leave header as is. */ if (pvs->draw->vs.edgeflag_output) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; int ef = pvs->draw->vs.edgeflag_output; - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { const float *edgeflag = out->data[ef]; out->edgeflag = !(edgeflag[0] != 1.0f); needpipe |= !out->edgeflag; - out = (struct vertex_header *)( (char *)out + stride ); + out = (struct vertex_header *)( (char *)out + info->stride ); } } return needpipe; @@ -187,18 +180,16 @@ post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, /* If bypass_clipping is set, skip cliptest and rhw divide. */ static boolean post_vs_viewport( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { float *position = out->data[pos]; /* Viewport mapping only, no cliptest/rhw divide @@ -207,9 +198,9 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, position[1] = position[1] * scale[1] + trans[1]; position[2] = position[2] * scale[2] + trans[2]; - out = (struct vertex_header *)((char *)out + stride); + out = (struct vertex_header *)((char *)out + info->stride); } - + return FALSE; } @@ -218,20 +209,16 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, * to do. */ static boolean post_vs_none( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { if (0) debug_printf("%s\n", __FUNCTION__); return FALSE; } boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, - struct vertex_header *pipeline_verts, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - return pvs->run( pvs, pipeline_verts, count, stride ); + return pvs->run( pvs, info ); } @@ -272,7 +259,7 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) return NULL; pvs->draw = draw; - + return pvs; } diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index bb153cedfa..5d82934889 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -25,151 +25,264 @@ * **************************************************************************/ -#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" -#include "translate/translate.h" -#include "translate/translate_cache.h" + +#include "util/u_math.h" +#include "util/u_memory.h" struct pt_so_emit { struct draw_context *draw; - struct translate *translate; + void *buffers[PIPE_MAX_SO_BUFFERS]; - struct translate_cache *cache; - unsigned prim; + unsigned input_vertex_stride; + const float (*inputs)[4]; - const struct vertex_info *vinfo; boolean has_so; + + boolean single_buffer; + + unsigned emitted_primitives; + unsigned emitted_vertices; }; -static void -prepare_so_emit( struct pt_so_emit *emit, - const struct vertex_info *vinfo ) + +void draw_pt_so_emit_prepare(struct pt_so_emit *emit) { struct draw_context *draw = emit->draw; - unsigned i; - struct translate_key hw_key; - unsigned dst_offset = 0; + emit->has_so = (draw->so.state.num_outputs > 0); + + /* if we have a state with outputs make sure we have + * buffers to output to */ if (emit->has_so) { - for (i = 0; i < draw->so.state.num_outputs; ++i) { - unsigned src_offset = (draw->so.state.register_index[i] * 4 * - sizeof(float) ); - unsigned output_format; - unsigned emit_sz = 0; - /*unsigned output_bytes = util_format_get_blocksize(output_format); - unsigned nr_compo = util_format_get_nr_components(output_format);*/ - - output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); - emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); - - /* doesn't handle EMIT_OMIT */ - assert(emit_sz != 0); - - if (draw->so.state.register_mask[i] != TGSI_WRITEMASK_XYZW) { - /* we only support rendering with XYZW writemask*/ - debug_printf("NOT_IMPLEMENTED(writemask with stream output) at %s: %s:%d\n", - __FUNCTION__, __FILE__, __LINE__); + boolean has_valid_buffer = FALSE; + unsigned i; + for (i = 0; i < draw->so.num_buffers; ++i) { + if (draw->so.buffers[i]) { + has_valid_buffer = TRUE; + break; } + } + emit->has_so = has_valid_buffer; + } + + if (!emit->has_so) + return; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +} - hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = 0; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].instance_divisor = 0; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; +static boolean +is_component_writable(unsigned mask, + unsigned compo) +{ + switch (mask) { + case TGSI_WRITEMASK_NONE: + return FALSE; + case TGSI_WRITEMASK_X: + return compo == 0; + case TGSI_WRITEMASK_Y: + return compo == 1; + case TGSI_WRITEMASK_XY: + return compo == 0 || compo == 1; + case TGSI_WRITEMASK_Z: + return compo == 2; + case TGSI_WRITEMASK_XZ: + return compo == 0 || compo == 2; + case TGSI_WRITEMASK_YZ: + return compo == 1 || compo == 2; + case TGSI_WRITEMASK_XYZ: + return compo == 0 || compo == 1 || compo == 2; + case TGSI_WRITEMASK_W: + return compo == 3; + case TGSI_WRITEMASK_XW: + return compo == 0 || compo == 3; + case TGSI_WRITEMASK_YW: + return compo == 1 || compo == 3; + case TGSI_WRITEMASK_XYW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_ZW: + return compo == 2 || compo == 3; + case TGSI_WRITEMASK_XZW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_YZW: + return compo == 1 || compo == 2 || compo == 4; + case TGSI_WRITEMASK_XYZW: + return compo < 4; + default: + debug_assert(!"Unknown writemask in stream out"); + return compo < 4; + } +} - dst_offset += emit_sz; +static void so_emit_prim(struct pt_so_emit *so, + unsigned *indices, + unsigned num_vertices) +{ + unsigned slot, i; + unsigned input_vertex_stride = so->input_vertex_stride; + struct draw_context *draw = so->draw; + const float (*input_ptr)[4]; + const struct pipe_stream_output_state *state = + &draw->so.state; + float **buffer = 0; + + input_ptr = so->inputs; + + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; + unsigned total_written_compos = 0; + /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0; slot < state->num_outputs; ++slot) { + unsigned idx = state->register_index[slot]; + unsigned writemask = state->register_mask[slot]; + unsigned written_compos = 0; + unsigned compo; + + buffer = (float**)&so->buffers[state->output_buffer[slot]]; + + /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx);*/ +#if 1 + assert(!util_is_inf_or_nan(input[idx][0])); + assert(!util_is_inf_or_nan(input[idx][1])); + assert(!util_is_inf_or_nan(input[idx][2])); + assert(!util_is_inf_or_nan(input[idx][3])); +#endif + for (compo = 0; compo < 4; ++compo) { + if (is_component_writable(writemask, compo)) { + float *buf = *buffer; + buf[written_compos++] = input[idx][compo]; + } + } +#if 0 + debug_printf("\t\t(writemask = %d)%f %f %f %f\n", + writemask, + input[idx][0], + input[idx][1], + input[idx][2], + input[idx][3]); +#endif + *buffer += written_compos; + total_written_compos += written_compos; } - hw_key.nr_elements = draw->so.state.num_outputs; - hw_key.output_stride = draw->so.state.stride; - - if (!emit->translate || - translate_key_compare(&emit->translate->key, &hw_key) != 0) - { - translate_key_sanitize(&hw_key); - emit->translate = translate_cache_find(emit->cache, &hw_key); + if (so->single_buffer) { + int stride = (int)state->stride - + sizeof(float) * total_written_compos; + + debug_assert(stride >= 0); + *buffer = (float*) (((char*)*buffer) + stride); } - } else { - /* no stream output */ - emit->translate = NULL; } + so->emitted_vertices += num_vertices; + ++so->emitted_primitives; } - -void draw_pt_so_emit_prepare( struct pt_so_emit *emit, - unsigned prim ) +static void so_point(struct pt_so_emit *so, int idx) { - struct draw_context *draw = emit->draw; - boolean ok; + unsigned indices[1]; - emit->has_so = (draw->so.state.num_outputs > 0); + indices[0] = idx; - if (!emit->has_so) - return; + so_emit_prim(so, indices, 1); +} - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +static void so_line(struct pt_so_emit *so, int i0, int i1) +{ + unsigned indices[2]; - emit->prim = prim; + indices[0] = i0; + indices[1] = i1; - ok = draw->render->set_primitive(draw->render, emit->prim); - if (!ok) { - assert(0); - return; - } + so_emit_prim(so, indices, 2); +} + +static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) +{ + unsigned indices[3]; - /* Must do this after set_primitive() above: */ - emit->vinfo = draw->render->get_vertex_info(draw->render); + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; - prepare_so_emit( emit, emit->vinfo ); + so_emit_prim(so, indices, 3); } +#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) +#define LINE(gs,i0,i1) so_line(so,i0,i1) +#define POINT(gs,i0) so_point(so,i0) +#define FUNC so_run_linear +#define LOCAL_VARS +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + +#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) +#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) +#define POINT(gs,i0) so_point(gs,elts[i0]) +#define FUNC so_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + void draw_pt_so_emit( struct pt_so_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride ) + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prims ) { struct draw_context *draw = emit->draw; - struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; - void *so_buffer; + unsigned start, i; if (!emit->has_so) return; - so_buffer = draw->so.buffers[0]; + emit->emitted_vertices = 0; + emit->emitted_primitives = 0; + emit->input_vertex_stride = input_verts->stride; + emit->inputs = (const float (*)[4])input_verts->verts->data; + for (i = 0; i < draw->so.num_buffers; ++i) { + emit->buffers[i] = draw->so.buffers[i]; + } + emit->single_buffer = TRUE; + for (i = 0; i < draw->so.state.num_outputs; ++i) { + if (draw->so.state.output_buffer[i] != 0) + emit->single_buffer = FALSE; + } /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (vertex_count == 0) - return; - - if (vertex_count >= UNDEFINED_VERTEX_ID) { - assert(0); - return; - } - - /* XXX we only support single output buffer */ - if (draw->so.num_buffers != 1) { - debug_printf("NOT_IMPLEMENTED(multiple stream output buffers) at %s: %s:%d\n", - __FUNCTION__, __FILE__, __LINE__); + for (start = i = 0; i < input_prims->primitive_count; + start += input_prims->primitive_lengths[i], i++) + { + unsigned count = input_prims->primitive_lengths[i]; + + if (input_prims->linear) { + so_run_linear(emit, input_prims, input_verts, + start, count); + } else { + so_run_elts(emit, input_prims, input_verts, + start, count); + } } - translate->set_buffer(translate, 0, vertex_data, - stride, ~0); - translate->run(translate, 0, vertex_count, - draw->instance_id, so_buffer); - - render->set_stream_output_info(render, 0, vertex_count); + render->set_stream_output_info(render, + emit->emitted_primitives, + emit->emitted_vertices); } @@ -180,19 +293,11 @@ struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ) return NULL; emit->draw = draw; - emit->cache = translate_cache_create(); - if (!emit->cache) { - FREE(emit); - return NULL; - } return emit; } void draw_pt_so_emit_destroy( struct pt_so_emit *emit ) { - if (emit->cache) - translate_cache_destroy(emit->cache); - FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 5ea833032f..d89d5cd20f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -137,7 +137,6 @@ static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { static void varray_prepare(struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt) { @@ -146,11 +145,12 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->base.run = varray_run; varray->input_prim = in_prim; - varray->output_prim = decompose_prim[out_prim]; + varray->output_prim = decompose_prim[in_prim]; varray->middle = middle; - middle->prepare(middle, varray->input_prim, - varray->output_prim, opt, &varray->driver_fetch_max ); + middle->prepare(middle, + varray->output_prim, + opt, &varray->driver_fetch_max ); /* check that the max is even */ assert((varray->driver_fetch_max & 1) == 0); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 914c87a9dc..b7e0da7d44 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -70,7 +70,6 @@ vcache_flush( struct vcache_frontend *vcache ) if (vcache->middle_prim != vcache->output_prim) { vcache->middle_prim = vcache->output_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); @@ -368,7 +367,6 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (vcache->middle_prim != vcache->input_prim) { vcache->middle_prim = vcache->input_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); @@ -472,7 +470,6 @@ vcache_check_run( struct draw_pt_front_end *frontend, static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt ) { @@ -487,8 +484,14 @@ vcache_prepare( struct draw_pt_front_end *frontend, vcache->base.run = vcache_check_run; } + /* VCache will always emit the reduced version of its input + * primitive, ie STRIP/FANS become TRIS, etc. + * + * This is not to be confused with what the GS might be up to, + * which is a separate issue. + */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(out_prim); + vcache->output_prim = u_reduced_prim(in_prim); vcache->middle = middle; vcache->opt = opt; @@ -497,8 +500,9 @@ vcache_prepare( struct draw_pt_front_end *frontend, * doing so: */ vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; - middle->prepare( middle, vcache->input_prim, - vcache->middle_prim, opt, &vcache->fetch_max ); + middle->prepare( middle, + vcache->middle_prim, + opt, &vcache->fetch_max ); } diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h new file mode 100644 index 0000000000..01212a8e53 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -0,0 +1,123 @@ + +static void FUNC( struct pt_so_emit *so, + const struct draw_prim_info *input_prims, + const struct draw_vertex_info *input_verts, + unsigned start, + unsigned count) +{ + struct draw_context *draw = so->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i; + LOCAL_VARS + + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + + debug_assert(input_prims->primitive_count == 1); + + switch (input_prims->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + POINT( so, start + i + 0 ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( so , start + i + 0 , start + i + 1 ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + + for (i = 1; i < count; i++) { + LINE( so, start + i - 1, start + i ); + } + + LINE( so, start + i - 1, start ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + LINE( so, start + i - 1, start + i ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 0, + start + i + 1 + (i&1), + start + i + 2 - (i&1) ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 0 + (i&1), + start + i + 1 - (i&1), + start + i + 2 ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 1, + start + i + 2, + start ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start, + start + i + 1, + start + i + 2 ); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + + for (i = 0; i+2 < count; i++) { + + if (flatfirst) { + TRIANGLE( so, start + 0, start + i + 1, start + i + 2 ); + } + else { + TRIANGLE( so, start + i + 1, start + i + 2, start + 0 ); + } + } + } + break; + + default: + debug_assert(!"Unsupported primitive in stream output"); + break; + } +} + + +#undef TRIANGLE +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 8d9768246e..e32803c072 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -123,7 +123,7 @@ struct vbuf_render { * Called after writing data to the stream out buffers */ void (*set_stream_output_info)( struct vbuf_render *vbufr, - unsigned buffer_index, + unsigned primitive_count, unsigned vertices_count ); }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index eb49204238..87e3e72a6e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -310,21 +310,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } -typedef void (*fetch_func)(float *, const uint8_t *, unsigned, unsigned); - -/** cast wrapper */ -static void * -fetch_func_ptr_to_voidptr(fetch_func f) -{ - union { - void *v; - fetch_func f; - } u; - u.f = f; - return u.v; -} - - /** * Fetch a pixel into a 4 float AoS. * @@ -406,7 +391,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, assert(LLVMIsDeclaration(function)); LLVMAddGlobalMapping(lp_build_engine, function, - fetch_func_ptr_to_voidptr(format_desc->fetch_rgba_float)); + func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 0a690ea747..44cfdc4d3f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -78,6 +78,9 @@ enum LLVM_CodeGenOpt_Level { extern void lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE); +extern void +lp_set_target_options(void); + void lp_build_init(void) @@ -86,6 +89,8 @@ lp_build_init(void) gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); #endif + lp_set_target_options(); + LLVMInitializeNativeTarget(); LLVMLinkInJIT(); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index f004c0ae45..5a9488b5f7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -36,6 +36,7 @@ #include <llvm-c/Core.h> #include <llvm-c/ExecutionEngine.h> +#include <llvm/Target/TargetOptions.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/JITEventListener.h> @@ -119,3 +120,25 @@ lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE) { llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener()); } + + +extern "C" void +lp_set_target_options(void) +{ +#if defined(DEBUG) +#if HAVE_LLVM >= 0x0207 + llvm::JITEmitDebugInfo = true; +#endif +#endif + +#if defined(DEBUG) || defined(PROFILE) + llvm::NoFramePointerElim = true; +#endif + + llvm::NoExcessFPPrecision = false; + + /* XXX: Investigate this */ +#if 0 + llvm::UnsafeFPMath = true; +#endif +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 0890078cd0..6dbedf15ca 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -164,6 +164,7 @@ tgsi_default_full_declaration( void ) full_declaration.Declaration = tgsi_default_declaration(); full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); + full_declaration.ImmediateData.u = NULL; return full_declaration; } @@ -180,7 +181,7 @@ tgsi_build_full_declaration( struct tgsi_declaration_range *dr; if( maxsize <= size ) - return 0; + return 0; declaration = (struct tgsi_declaration *) &tokens[size]; size++; @@ -235,6 +236,24 @@ tgsi_build_full_declaration( header ); } + if (full_decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i, j; + union tgsi_immediate_data *data; + + for (i = 0; i <= dr->Last; ++i) { + for (j = 0; j < 4; ++j) { + unsigned idx = i*4 + j; + if (maxsize <= size) + return 0; + data = (union tgsi_immediate_data *) &tokens[size]; + ++size; + + *data = full_decl->ImmediateData.u[idx]; + declaration_grow( declaration, header ); + } + } + } + return size; } @@ -613,6 +632,7 @@ tgsi_build_full_instruction( reg->Register.File, reg->Register.WriteMask, reg->Register.Indirect, + reg->Register.Dimension, reg->Register.Index, instruction, header ); @@ -640,6 +660,46 @@ tgsi_build_full_instruction( instruction, header ); } + + if( reg->Register.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->Dimension.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->Dimension.Indirect, + reg->Dimension.Index, + instruction, + header ); + + if( reg->Dimension.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->DimIndirect.File, + reg->DimIndirect.SwizzleX, + reg->DimIndirect.SwizzleY, + reg->DimIndirect.SwizzleZ, + reg->DimIndirect.SwizzleW, + reg->DimIndirect.Negate, + reg->DimIndirect.Absolute, + reg->DimIndirect.Indirect, + reg->DimIndirect.Dimension, + reg->DimIndirect.Index, + instruction, + header ); + } + } } for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { @@ -959,6 +1019,7 @@ tgsi_build_dst_register( unsigned file, unsigned mask, unsigned indirect, + unsigned dimension, int index, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -974,6 +1035,7 @@ tgsi_build_dst_register( dst_register.WriteMask = mask; dst_register.Index = index; dst_register.Indirect = indirect; + dst_register.Dimension = dimension; instruction_grow( instruction, header ); @@ -987,6 +1049,8 @@ tgsi_default_full_dst_register( void ) full_dst_register.Register = tgsi_default_dst_register(); full_dst_register.Indirect = tgsi_default_src_register(); + full_dst_register.Dimension = tgsi_default_dimension(); + full_dst_register.DimIndirect = tgsi_default_src_register(); return full_dst_register; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 13d7f5272d..112107a088 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -263,6 +263,7 @@ tgsi_build_dst_register( unsigned file, unsigned mask, unsigned indirect, + unsigned dimension, int index, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 35480076ed..9fcc28f4c9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -101,7 +101,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static const char *interpolate_names[] = @@ -191,29 +193,30 @@ static const char *fs_coord_pixel_center_names[] = static void -_dump_register_dst( - struct dump_ctx *ctx, - uint file, - int index) -{ - ENM( file, file_names ); - - CHR( '[' ); - SID( index ); - CHR( ']' ); -} - - -static void _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { ENM(src->Register.File, file_names); if (src->Register.Dimension) { - CHR('['); - SID(src->Dimension.Index); - CHR(']'); + if (src->Dimension.Indirect) { + CHR( '[' ); + ENM( src->DimIndirect.File, file_names ); + CHR( '[' ); + SID( src->DimIndirect.Index ); + TXT( "]." ); + ENM( src->DimIndirect.SwizzleX, swizzle_names ); + if (src->Dimension.Index != 0) { + if (src->Dimension.Index > 0) + CHR( '+' ); + SID( src->Dimension.Index ); + } + CHR( ']' ); + } else { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } } if (src->Register.Indirect) { CHR( '[' ); @@ -235,30 +238,52 @@ _dump_register_src( } } + static void -_dump_register_ind( +_dump_register_dst( struct dump_ctx *ctx, - uint file, - int index, - uint ind_file, - int ind_index, - uint ind_swizzle ) + const struct tgsi_full_dst_register *dst ) { - ENM( file, file_names ); - CHR( '[' ); - ENM( ind_file, file_names ); - CHR( '[' ); - SID( ind_index ); - TXT( "]." ); - ENM( ind_swizzle, swizzle_names ); - if (index != 0) { - if (index > 0) - CHR( '+' ); - SID( index ); + ENM(dst->Register.File, file_names); + if (dst->Register.Dimension) { + if (dst->Dimension.Indirect) { + CHR( '[' ); + ENM( dst->DimIndirect.File, file_names ); + CHR( '[' ); + SID( dst->DimIndirect.Index ); + TXT( "]." ); + ENM( dst->DimIndirect.SwizzleX, swizzle_names ); + if (dst->Dimension.Index != 0) { + if (dst->Dimension.Index > 0) + CHR( '+' ); + SID( dst->Dimension.Index ); + } + CHR( ']' ); + } else { + CHR('['); + SID(dst->Dimension.Index); + CHR(']'); + } + } + if (dst->Register.Indirect) { + CHR( '[' ); + ENM( dst->Indirect.File, file_names ); + CHR( '[' ); + SID( dst->Indirect.Index ); + TXT( "]." ); + ENM( dst->Indirect.SwizzleX, swizzle_names ); + if (dst->Register.Index != 0) { + if (dst->Register.Index > 0) + CHR( '+' ); + SID( dst->Register.Index ); + } + CHR( ']' ); + } else { + CHR( '[' ); + SID( dst->Register.Index ); + CHR( ']' ); } - CHR( ']' ); } - static void _dump_writemask( struct dump_ctx *ctx, @@ -277,6 +302,39 @@ _dump_writemask( } } +static void +dump_imm_data(struct tgsi_iterate_context *iter, + union tgsi_immediate_data *data, + unsigned num_tokens, + unsigned data_type) +{ + struct dump_ctx *ctx = (struct dump_ctx *)iter; + unsigned i ; + + TXT( " {" ); + + assert( num_tokens <= 4 ); + for (i = 0; i < num_tokens; i++) { + switch (data_type) { + case TGSI_IMM_FLOAT32: + FLT( data[i].Float ); + break; + case TGSI_IMM_UINT32: + UID(data[i].Uint); + break; + case TGSI_IMM_INT32: + SID(data[i].Int); + break; + default: + assert( 0 ); + } + + if (i < num_tokens - 1) + TXT( ", " ); + } + TXT( "}" ); +} + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -357,6 +415,43 @@ iter_declaration( } } + if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i; + char range_indent[4]; + + TXT(" {"); + + if (decl->Range.Last < 10) + range_indent[0] = '\0'; + else if (decl->Range.Last < 100) { + range_indent[0] = ' '; + range_indent[1] = '\0'; + } else if (decl->Range.Last < 1000) { + range_indent[0] = ' '; + range_indent[1] = ' '; + range_indent[2] = '\0'; + } else { + range_indent[0] = ' '; + range_indent[1] = ' '; + range_indent[2] = ' '; + range_indent[3] = '\0'; + } + + dump_imm_data(iter, decl->ImmediateData.u, + 4, TGSI_IMM_FLOAT32); + for(i = 1; i <= decl->Range.Last; ++i) { + /* indent by strlen of: + * "DCL IMMX[0..1] {" */ + CHR('\n'); + TXT( " " ); + TXT( range_indent ); + dump_imm_data(iter, decl->ImmediateData.u + i, + 4, TGSI_IMM_FLOAT32); + } + + TXT(" }"); + } + EOL(); return TRUE; @@ -430,33 +525,11 @@ iter_immediate( { struct dump_ctx *ctx = (struct dump_ctx *) iter; - uint i; - TXT( "IMM " ); ENM( imm->Immediate.DataType, immediate_type_names ); - TXT( " { " ); - - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - FLT( imm->u[i].Float ); - break; - case TGSI_IMM_UINT32: - UID(imm->u[i].Uint); - break; - case TGSI_IMM_INT32: - SID(imm->u[i].Int); - break; - default: - assert( 0 ); - } - - if (i < imm->Immediate.NrTokens - 2) - TXT( ", " ); - } - TXT( " }" ); + dump_imm_data(iter, imm->u, imm->Immediate.NrTokens - 1, + imm->Immediate.DataType); EOL(); @@ -487,12 +560,12 @@ iter_instruction( INSTID( instno ); TXT( ": " ); - + ctx->indent -= info->pre_dedent; for(i = 0; (int)i < ctx->indent; ++i) TXT( " " ); ctx->indent += info->post_indent; - + if (inst->Instruction.Predicate) { CHR( '(' ); @@ -539,21 +612,7 @@ iter_instruction( CHR( ',' ); CHR( ' ' ); - if (dst->Register.Indirect) { - _dump_register_ind( - ctx, - dst->Register.File, - dst->Register.Index, - dst->Indirect.File, - dst->Indirect.Index, - dst->Indirect.SwizzleX ); - } - else { - _dump_register_dst( - ctx, - dst->Register.File, - dst->Register.Index ); - } + _dump_register_dst( ctx, dst ); _dump_writemask( ctx, dst->Register.WriteMask ); first_reg = FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c15d970b57..5275faa5e2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -699,6 +699,19 @@ tgsi_exec_machine_bind_shader( ++mach->NumOutputs; } } + if (parse.FullToken.FullDeclaration.Declaration.File == + TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned reg; + struct tgsi_full_declaration *decl = + &parse.FullToken.FullDeclaration; + debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES); + for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) { + for( i = 0; i < 4; i++ ) { + int idx = reg * 4 + i; + mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float; + } + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -1046,8 +1059,15 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: for (i = 0; i < QUAD_SIZE; i++) { - /* XXX: 2D indexing */ - chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; + /* + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", + index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], + index2D->i[i], index->i[i]); + }*/ + chan->u[i] = mach->Inputs[index2D->i[i] * + TGSI_EXEC_MAX_INPUT_ATTRIBS + + index->i[i]].xyzw[swizzle].u[i]; } break; @@ -1060,6 +1080,16 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_TEMPORARY_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS); + + chan->u[i] = + mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i]; + } + break; + case TGSI_FILE_IMMEDIATE: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); @@ -1069,6 +1099,14 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_IMMEDIATE_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] == 0); + + chan->f[i] = mach->ImmArray[index->i[i]][swizzle]; + } + break; + case TGSI_FILE_ADDRESS: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0); @@ -1280,6 +1318,7 @@ store_dest(struct tgsi_exec_machine *mach, uint i; union tgsi_exec_channel null; union tgsi_exec_channel *dst; + union tgsi_exec_channel index2D; uint execmask = mach->ExecMask; int offset = 0; /* indirection offset */ int index; @@ -1325,6 +1364,77 @@ store_dest(struct tgsi_exec_machine *mach, offset = indir_index.i[0]; } + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[3][1], + * where: + * [3] = Dimension.Index + */ + if (reg->Register.Dimension) { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; + + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[ind[4].y+3][1], + * where: + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX + */ + if (reg->Dimension.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + unsigned swizzle; + uint i; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->DimIndirect.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } + } + } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether Dimension is followed + * by a dimension register and continue the saga. + */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; + } + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; @@ -1351,6 +1461,16 @@ store_dest(struct tgsi_exec_machine *mach, dst = &mach->Temps[offset + index].xyzw[chan_index]; break; + case TGSI_FILE_TEMPORARY_ARRAY: + index = reg->Register.Index; + assert( index < TGSI_EXEC_NUM_TEMPS ); + assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS ); + /* XXX we use index2D.i[0] here but somehow we might + * end up with someone trying to store indirectly in + * different buffers */ + dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index]; + break; + case TGSI_FILE_ADDRESS: index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; @@ -1536,6 +1656,19 @@ emit_primitive(struct tgsi_exec_machine *mach) } } +static void +conditional_emit_primitive(struct tgsi_exec_machine *mach) +{ + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + int emitted_verts = + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; + if (emitted_verts) { + emit_primitive(mach); + } + } +} + + /* * Fetch four texture samples using STR texture coordinates. */ @@ -3185,6 +3318,9 @@ exec_instruction( break; case TGSI_OPCODE_END: + /* make sure we end primitives which haven't + * been explicitly emitted */ + conditional_emit_primitive(mach); /* halt execution */ *pc = -1; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 3caf820af6..ccf80ca6fd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -93,6 +93,7 @@ struct tgsi_sampler #define TGSI_EXEC_NUM_TEMPS 128 #define TGSI_EXEC_NUM_IMMEDIATES 256 +#define TGSI_EXEC_NUM_TEMP_ARRAYS 8 /* * Locations of various utility registers (_I = Index, _C = Channel) @@ -237,9 +238,12 @@ struct tgsi_exec_machine */ struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector TempArray[TGSI_EXEC_NUM_TEMP_ARRAYS][TGSI_EXEC_NUM_TEMPS]; float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 7e19e1fe36..db9a342220 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -117,6 +117,17 @@ tgsi_parse_token( next_token( ctx, &decl->Semantic ); } + if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i, j; + decl->ImmediateData.u = (union tgsi_immediate_data*) + &ctx->Tokens[ctx->Position]; + for (i = 0; i <= decl->Range.Last; ++i) { + for (j = 0; j < 4; ++j) { + ctx->Position++; + } + } + } + break; } @@ -181,11 +192,6 @@ tgsi_parse_token( next_token( ctx, &inst->Dst[i].Register ); - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->Dst[i].Register.Dimension ); - if( inst->Dst[i].Register.Indirect ) { next_token( ctx, &inst->Dst[i].Indirect ); @@ -195,6 +201,24 @@ tgsi_parse_token( assert( !inst->Dst[i].Indirect.Dimension ); assert( !inst->Dst[i].Indirect.Indirect ); } + if( inst->Dst[i].Register.Dimension ) { + next_token( ctx, &inst->Dst[i].Dimension ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->Dst[i].Dimension.Dimension ); + + if( inst->Dst[i].Dimension.Indirect ) { + next_token( ctx, &inst->Dst[i].DimIndirect ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->Dst[i].Indirect.Indirect ); + assert( !inst->Dst[i].Indirect.Dimension ); + } + } } assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index b45ccee2f6..36de8807b4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -44,6 +44,8 @@ struct tgsi_full_dst_register { struct tgsi_dst_register Register; struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; }; struct tgsi_full_src_register @@ -54,12 +56,18 @@ struct tgsi_full_src_register struct tgsi_src_register DimIndirect; }; +struct tgsi_immediate_array_data +{ + union tgsi_immediate_data *u; +}; + struct tgsi_full_declaration { struct tgsi_declaration Declaration; struct tgsi_declaration_range Range; struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; + struct tgsi_immediate_array_data ImmediateData; }; struct tgsi_full_immediate diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index ce0a92f7fb..97148dbe23 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -90,9 +90,18 @@ static void scan_register_dst(scan_register *reg, struct tgsi_full_dst_register *dst) { - fill_scan_register1d(reg, - dst->Register.File, - dst->Register.Index); + if (dst->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + fill_scan_register2d(reg, + dst->Register.File, + dst->Register.Index, + dst->Dimension.Index); + } else { + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); + } } static void @@ -102,7 +111,6 @@ scan_register_src(scan_register *reg, if (src->Register.Dimension) { /*FIXME: right now we don't support indirect * multidimensional addressing */ - debug_assert(!src->Dimension.Indirect); fill_scan_register2d(reg, src->Register.File, src->Register.Index, @@ -236,7 +244,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 527b7d7b22..55fccba4d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -280,7 +280,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static boolean @@ -345,12 +347,68 @@ parse_opt_writemask( return TRUE; } + +/* <register_file_bracket> ::= <file> `[' + */ static boolean -parse_register_dst( struct translate_ctx *ctx, - uint *file, - int *index ); +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; -struct parsed_src_bracket { + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse simple 1d register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_1d(struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +struct parsed_bracket { int index; uint ind_file; @@ -360,21 +418,21 @@ struct parsed_src_bracket { static boolean -parse_register_src_bracket( +parse_register_bracket( struct translate_ctx *ctx, - struct parsed_src_bracket *brackets) + struct parsed_bracket *brackets) { const char *cur; uint uindex; - memset(brackets, 0, sizeof(struct parsed_src_bracket)); + memset(brackets, 0, sizeof(struct parsed_bracket)); eat_opt_white( &ctx->cur ); cur = ctx->cur; if (parse_file( &cur, &brackets->ind_file )) { - if (!parse_register_dst( ctx, &brackets->ind_file, - &brackets->ind_index )) + if (!parse_register_1d( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -443,7 +501,7 @@ parse_register_src_bracket( static boolean parse_opt_register_src_bracket( struct translate_ctx *ctx, - struct parsed_src_bracket *brackets, + struct parsed_bracket *brackets, int *parsed_brackets) { const char *cur = ctx->cur; @@ -455,7 +513,7 @@ parse_opt_register_src_bracket( ++cur; ctx->cur = cur; - if (!parse_register_src_bracket(ctx, brackets)) + if (!parse_register_bracket(ctx, brackets)) return FALSE; *parsed_brackets = 1; @@ -464,46 +522,6 @@ parse_opt_register_src_bracket( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ -static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} /* Parse source register operand. * <register_src> ::= <register_file_bracket_index> `]' | @@ -515,13 +533,12 @@ static boolean parse_register_src( struct translate_ctx *ctx, uint *file, - struct parsed_src_bracket *brackets) + struct parsed_bracket *brackets) { - brackets->ind_comp = TGSI_SWIZZLE_X; if (!parse_register_file_bracket( ctx, file )) return FALSE; - if (!parse_register_src_bracket( ctx, brackets )) + if (!parse_register_bracket( ctx, brackets )) return FALSE; return TRUE; @@ -629,23 +646,19 @@ parse_register_dcl( } -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ +/* Parse destination register operand.*/ static boolean parse_register_dst( struct translate_ctx *ctx, uint *file, - int *index ) + struct parsed_bracket *brackets) { - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) return FALSE; - } - ctx->cur++; + if (!parse_register_bracket( ctx, brackets )) + return FALSE; + return TRUE; } @@ -655,11 +668,14 @@ parse_dst_operand( struct tgsi_full_dst_register *dst ) { uint file; - int index; uint writemask; const char *cur; + struct parsed_bracket bracket[2]; + int parsed_opt_brackets; - if (!parse_register_dst( ctx, &file, &index )) + if (!parse_register_dst( ctx, &file, &bracket[0] )) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) return FALSE; cur = ctx->cur; @@ -669,8 +685,24 @@ parse_dst_operand( return FALSE; dst->Register.File = file; - dst->Register.Index = index; + if (parsed_opt_brackets) { + dst->Register.Dimension = 1; + dst->Dimension.Indirect = 0; + dst->Dimension.Dimension = 0; + dst->Dimension.Index = bracket[0].index; + bracket[0] = bracket[1]; + } + dst->Register.Index = bracket[0].index; dst->Register.WriteMask = writemask; + if (bracket[0].ind_file != TGSI_FILE_NULL) { + dst->Register.Indirect = 1; + dst->Indirect.File = bracket[0].ind_file; + dst->Indirect.Index = bracket[0].ind_index; + dst->Indirect.SwizzleX = bracket[0].ind_comp; + dst->Indirect.SwizzleY = bracket[0].ind_comp; + dst->Indirect.SwizzleZ = bracket[0].ind_comp; + dst->Indirect.SwizzleW = bracket[0].ind_comp; + } return TRUE; } @@ -719,7 +751,7 @@ parse_src_operand( uint file; uint swizzle[4]; boolean parsed_swizzle; - struct parsed_src_bracket bracket[2]; + struct parsed_bracket bracket[2]; int parsed_opt_brackets; if (*ctx->cur == '-') { @@ -835,7 +867,7 @@ parse_instruction( inst.Predicate.Negate = 1; } - if (!parse_register_dst( ctx, &file, &index )) + if (!parse_register_1d( ctx, &file, &index )) return FALSE; if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { @@ -985,6 +1017,45 @@ static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = "PERSPECTIVE" }; + +/* parses a 4-touple of the form {x, y, z, w} + * where x, y, z, w are numbers */ +static boolean parse_immediate_data(struct translate_ctx *ctx, + float *values) +{ + unsigned i; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + return TRUE; +} + static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; @@ -995,6 +1066,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) const char *cur; uint advance; boolean is_vs_input; + boolean is_imm_array; assert(Elements(semantic_names) == TGSI_SEMANTIC_COUNT); assert(Elements(interpolate_names) == TGSI_INTERPOLATE_COUNT); @@ -1023,8 +1095,9 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.Dim.Index2D = brackets[0].first; } - is_vs_input = (file == TGSI_FILE_INPUT && + is_vs_input = (file == TGSI_FILE_INPUT && ctx->processor == TGSI_PROCESSOR_VERTEX); + is_imm_array = (file == TGSI_FILE_IMMEDIATE_ARRAY); cur = ctx->cur; eat_opt_white( &cur ); @@ -1067,6 +1140,44 @@ static boolean parse_declaration( struct translate_ctx *ctx ) break; } } + } else if (is_imm_array) { + unsigned i; + float *vals_itr; + /* we have our immediate data */ + if (*cur != '{') { + report_error( ctx, "Immediate array without data" ); + return FALSE; + } + ++cur; + ctx->cur = cur; + + decl.ImmediateData.u = + MALLOC(sizeof(union tgsi_immediate_data) * 4 * + (decl.Range.Last + 1)); + vals_itr = (float*)decl.ImmediateData.u; + for (i = 0; i <= decl.Range.Last; ++i) { + if (!parse_immediate_data(ctx, vals_itr)) { + FREE(decl.ImmediateData.u); + return FALSE; + } + vals_itr += 4; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + if (i != decl.Range.Last) { + report_error( ctx, "Not enough data in immediate array!" ); + FREE(decl.ImmediateData.u); + return FALSE; + } + } else + ++ctx->cur; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + FREE(decl.ImmediateData.u); + report_error( ctx, "Immediate array data missing closing '}'" ); + return FALSE; + } + ++ctx->cur; } cur = ctx->cur; @@ -1097,6 +1208,10 @@ static boolean parse_declaration( struct translate_ctx *ctx ) ctx->tokens_cur, ctx->header, (uint) (ctx->tokens_end - ctx->tokens_cur) ); + + if (is_imm_array) + FREE(decl.ImmediateData.u); + if (advance == 0) return FALSE; ctx->tokens_cur += advance; @@ -1107,7 +1222,6 @@ static boolean parse_declaration( struct translate_ctx *ctx ) static boolean parse_immediate( struct translate_ctx *ctx ) { struct tgsi_full_immediate imm; - uint i; float values[4]; uint advance; @@ -1115,37 +1229,13 @@ static boolean parse_immediate( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + if (!str_match_no_case( &ctx->cur, "FLT32" ) || + is_digit_alpha_underscore( ctx->cur )) { report_error( ctx, "Expected `FLT32'" ); return FALSE; } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '{') { - report_error( ctx, "Expected `{'" ); - return FALSE; - } - ctx->cur++; - for (i = 0; i < 4; i++) { - eat_opt_white( &ctx->cur ); - if (i > 0) { - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - if (!parse_float( &ctx->cur, &values[i] )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '}') { - report_error( ctx, "Expected `}'" ); - return FALSE; - } - ctx->cur++; + + parse_immediate_data(ctx, values); imm = tgsi_default_full_immediate(); imm.Immediate.NrTokens += 4; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 7d357e154b..3cf6893a9b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -732,11 +732,12 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, } -void +void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); + unsigned size = 1 + (src.Indirect ? 1 : 0) + + (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -769,11 +770,27 @@ ureg_emit_src( struct ureg_program *ureg, } if (src.Dimension) { - out[0].src.Dimension = 1; - out[n].dim.Indirect = 0; - out[n].dim.Dimension = 0; - out[n].dim.Padding = 0; - out[n].dim.Index = src.DimensionIndex; + if (src.DimIndirect) { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 1; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + n++; + out[n].value = 0; + out[n].src.File = src.DimIndFile; + out[n].src.SwizzleX = src.DimIndSwizzle; + out[n].src.SwizzleY = src.DimIndSwizzle; + out[n].src.SwizzleZ = src.DimIndSwizzle; + out[n].src.SwizzleW = src.DimIndSwizzle; + out[n].src.Index = src.DimIndIndex; + } else { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 0; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + } n++; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 055545f3d2..07fb01ab7b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -49,14 +49,18 @@ struct ureg_src unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ unsigned Indirect : 1; /* BOOL */ + unsigned DimIndirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ unsigned Absolute : 1; /* BOOL */ unsigned Negate : 1; /* BOOL */ int Index : 16; /* SINT */ - unsigned IndirectFile : 4; /* TGSI_FILE_ */ - int IndirectIndex : 16; /* SINT */ - unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ - int DimensionIndex : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ + int IndirectIndex : 16; /* SINT */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ + unsigned DimIndFile : 4; /* TGSI_FILE_ */ + int DimIndIndex : 16; /* SINT */ + unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -821,12 +825,28 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) return reg; } -static INLINE struct ureg_src +static INLINE struct ureg_src ureg_src_dimension( struct ureg_src reg, int index ) { assert(reg.File != TGSI_FILE_NULL); reg.Dimension = 1; + reg.DimIndirect = 0; + reg.DimensionIndex = index; + return reg; +} + + +static INLINE struct ureg_src +ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr, + int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimIndirect = 1; reg.DimensionIndex = index; + reg.DimIndFile = addr.File; + reg.DimIndIndex = addr.Index; + reg.DimIndSwizzle = addr.SwizzleX; return reg; } @@ -874,6 +894,10 @@ ureg_src_register(unsigned file, src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; return src; } @@ -897,6 +921,10 @@ ureg_src( struct ureg_dst dst ) src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; return src; } @@ -944,7 +972,11 @@ ureg_src_undef( void ) src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; - + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; + return src; } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index f8dbd2b36a..0e43a512ee 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -378,25 +378,28 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, char *dst = (vert + tg->attrib[attr].output_offset); - if (tg->attrib[attr].instance_divisor) { - index = instance_id / tg->attrib[attr].instance_divisor; - } else { - index = elt; - } + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + if (tg->attrib[attr].instance_divisor) { + index = instance_id / tg->attrib[attr].instance_divisor; + } else { + index = elt; + } - index = MIN2(index, tg->attrib[attr].max_index); + index = MIN2(index, tg->attrib[attr].max_index); - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * index; + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; - tg->attrib[attr].fetch( data, src, 0, 0 ); + tg->attrib[attr].fetch( data, src, 0, 0 ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } - vert += tg->translate.key.output_stride; } } diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 954f5706ef..5e373ff24c 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -195,7 +195,7 @@ debug_get_flags_option(const char *name, namealign = MAX2(namealign, strlen(flags->name)); for (flags = orig; flags->name; ++flags) debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, - sizeof(unsigned long)*CHAR_BIT/4, flags->value, + (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value, flags->desc ? " " : "", flags->desc ? flags->desc : ""); } else { diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 6370e77986..fe19466436 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -567,12 +567,26 @@ util_bswap16(uint16_t n) #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) +/** + * Align a value, only works pot alignemnts. + */ static INLINE int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } +/** + * Works like align but on npot alignments. + */ +static INLINE size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + static INLINE unsigned u_minify(unsigned value, unsigned levels) { diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h index ae6f43bff8..cce0c7430e 100644 --- a/src/gallium/auxiliary/util/u_pointer.h +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -111,6 +111,17 @@ pointer_to_func( void *p ) return pf.f; } +static INLINE void * +func_to_pointer( func_pointer f ) +{ + union { + void *p; + func_pointer f; + } pf; + pf.f = f; + return pf.p; +} + #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 75d44432d9..af229e61a0 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -59,6 +59,8 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned usage ) { struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + if (!upload) + return NULL; upload->pipe = pipe; upload->default_size = default_size; |