diff options
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt.c | 113 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_aos.c | 17 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_exec.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_ppc.c | 53 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_sse.c | 16 |
5 files changed, 159 insertions, 46 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 87ec6ae20c..3c175f31d8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -33,6 +33,8 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "tgsi/tgsi_dump.h" static unsigned trim( unsigned count, unsigned first, unsigned incr ) { @@ -176,6 +178,92 @@ void draw_pt_destroy( struct draw_context *draw ) } +/** + * Debug- print the first 'count' vertices. + */ +static void +draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) +{ + uint i; + + debug_printf("Draw arrays(prim = %u, start = %u, count = %u)\n", + prim, start, count); + + for (i = 0; i < count; i++) { + uint ii, j; + + if (draw->pt.user.elts) { + /* indexed arrays */ + switch (draw->pt.user.eltSize) { + case 1: + { + const ubyte *elem = (const ubyte *) draw->pt.user.elts; + ii = elem[start + i]; + } + break; + case 2: + { + const ushort *elem = (const ushort *) draw->pt.user.elts; + ii = elem[start + i]; + } + break; + case 4: + { + const uint *elem = (const uint *) draw->pt.user.elts; + ii = elem[start + i]; + } + break; + default: + assert(0); + } + debug_printf("Element[%u + %u] -> Vertex %u:\n", start, i, ii); + } + else { + /* non-indexed arrays */ + ii = start + i; + debug_printf("Vertex %u:\n", ii); + } + + for (j = 0; j < draw->pt.nr_vertex_elements; j++) { + uint buf = draw->pt.vertex_element[j].vertex_buffer_index; + ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + ptr += draw->pt.vertex_buffer[buf].pitch * ii; + ptr += draw->pt.vertex_element[j].src_offset; + + debug_printf(" Attr %u: ", j); + switch (draw->pt.vertex_element[j].src_format) { + case PIPE_FORMAT_R32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f @ %p\n", v[0], (void *) v); + } + break; + case PIPE_FORMAT_R32G32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v); + } + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v); + } + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3], + (void *) v); + } + break; + default: + debug_printf("other format (fix me)\n"); + ; + } + } + } +} /** @@ -195,6 +283,31 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw->reduced_prim = reduced_prim; } + if (0) + draw_print_arrays(draw, prim, start, MIN2(count, 20)); + +#if 0 + { + int i; + debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", + prim, start, count); + tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); + debug_printf("Elements:\n"); + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + debug_printf(" format=%s comps=%u\n", + pf_name(draw->pt.vertex_element[i].src_format), + draw->pt.vertex_element[i].nr_components); + } + debug_printf("Buffers:\n"); + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + debug_printf(" pitch=%u offset=%u ptr=%p\n", + draw->pt.vertex_buffer[i].pitch, + draw->pt.vertex_buffer[i].buffer_offset, + draw->pt.user.vbuffer[i]); + } + } +#endif + /* drawing done here: */ draw_pt_arrays(draw, prim, start, count); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 87232865e2..6141ba9cbf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1632,6 +1632,17 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg tmp0 = aos_get_xmm_reg(cp); + + sse2_cvttps2dq(cp->func, tmp0, arg0); + sse2_cvtdq2ps(cp->func, tmp0, tmp0); + + store_dest(cp, &op->FullDstRegisters[0], tmp0); + return TRUE; +} static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { @@ -1770,6 +1781,9 @@ emit_instruction( struct aos_compilation *cp, case TGSI_OPCODE_SIN: return emit_SIN(cp, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(cp, inst); + case TGSI_OPCODE_END: return TRUE; @@ -2176,7 +2190,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->buffer) goto fail; - debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); + if (0) + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); #if 0 tgsi_dump(vs->state.tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 13d4fcfdbf..80c3606657 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -123,6 +123,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, input = (const float (*)[4])((const char *)input + input_stride); } + tgsi_set_exec_mask(machine, + 1, + max_vertices > 1, + max_vertices > 2, + max_vertices > 3); + /* run interpreter */ tgsi_exec_machine_run( machine ); diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8eff6d4fda..8b75136144 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -54,31 +54,16 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], - float (*immeds)[4][4], + float (*immeds)[4], float (*consts)[4], const float *builtins); -#if 0 - const struct tgsi_exec_vector *input, - struct tgsi_exec_vector *output, - float (*constant)[4], /* 3 */ - struct tgsi_exec_vector *temporary, /* 4 */ - float (*immediates)[4], /* 5 */ - const float (*aos_input)[4], /* 6 */ - uint num_inputs, /* 7 */ - uint input_stride, /* 8 */ - float (*aos_output)[4], /* 9 */ - uint num_outputs, /* 10 */ - uint output_stride ); /* 11 */ -#endif struct draw_ppc_vertex_shader { struct draw_vertex_shader base; struct ppc_function ppc_program; codegen_function func; - - struct tgsi_exec_machine *machine; }; @@ -86,11 +71,12 @@ static void vs_ppc_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { + /* nothing */ } - -/* Simplified vertex shader interface for the pt paths. Given the +/** + * Simplified vertex shader interface for the pt paths. Given the * complexity of code-generating all the above operations together, * it's time to try doing all the other stuff separately. */ @@ -104,7 +90,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, unsigned output_stride ) { struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; - struct tgsi_exec_machine *machine = shader->machine; unsigned int i; #define MAX_VERTICES 4 @@ -137,27 +122,11 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* run compiled shader */ -#if 0 - shader->func(machine->Inputs, - machine->Outputs, - (float (*)[4])constants, - machine->Temps, - (float (*)[4])shader->base.immediates, - input, - base->info.num_inputs, - input_stride, - output, - base->info.num_outputs, - output_stride ); -#else shader->func(inputs_soa, outputs_soa, temps_soa, - (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) shader->base.immediates, (float (*)[4]) constants, ppc_builtin_constants); - /*output[0][0] = input[0][0] * 0.5;*/ -#endif - /* convert (up to) four output verts from SoA back to AoS format */ for (attr = 0; attr < base->info.num_outputs; attr++) { float *vOut = (float *) output; @@ -183,8 +152,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, } - - static void vs_ppc_delete( struct draw_vertex_shader *base ) { @@ -201,7 +168,7 @@ vs_ppc_delete( struct draw_vertex_shader *base ) struct draw_vertex_shader * draw_create_vs_ppc(struct draw_context *draw, - const struct pipe_shader_state *templ) + const struct pipe_shader_state *templ) { struct draw_ppc_vertex_shader *vs; @@ -227,16 +194,14 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; - vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); - vs->machine = &draw->vs.machine; - - ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ + ppc_init_func( &vs->ppc_program ); if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, - (float (*)[4])vs->base.immediates, + (float (*)[4]) vs->base.immediates, TRUE )) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index b11ae31662..77ba5152f9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -37,7 +37,7 @@ #include "draw_vs.h" -#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) +#if defined(PIPE_ARCH_X86) #include "pipe/p_shader_tokens.h" @@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = shader->machine; unsigned int i; + /* By default, execute all channels. XXX move this inside the loop + * below when we support shader conditionals/loops. + */ + tgsi_set_exec_mask(machine, 1, 1, 1, 1); + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + if (max_vertices < 4) { + /* disable the unused execution channels */ + tgsi_set_exec_mask(machine, + 1, + max_vertices > 1, + max_vertices > 2, + 0); + } + /* run compiled shader */ shader->func(machine->Inputs, |