summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c113
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c53
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c16
5 files changed, 159 insertions, 46 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 87ec6ae20c..3c175f31d8 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -33,6 +33,8 @@
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "tgsi/tgsi_dump.h"
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
@@ -176,6 +178,92 @@ void draw_pt_destroy( struct draw_context *draw )
}
+/**
+ * Debug- print the first 'count' vertices.
+ */
+static void
+draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
+{
+ uint i;
+
+ debug_printf("Draw arrays(prim = %u, start = %u, count = %u)\n",
+ prim, start, count);
+
+ for (i = 0; i < count; i++) {
+ uint ii, j;
+
+ if (draw->pt.user.elts) {
+ /* indexed arrays */
+ switch (draw->pt.user.eltSize) {
+ case 1:
+ {
+ const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+ ii = elem[start + i];
+ }
+ break;
+ case 2:
+ {
+ const ushort *elem = (const ushort *) draw->pt.user.elts;
+ ii = elem[start + i];
+ }
+ break;
+ case 4:
+ {
+ const uint *elem = (const uint *) draw->pt.user.elts;
+ ii = elem[start + i];
+ }
+ break;
+ default:
+ assert(0);
+ }
+ debug_printf("Element[%u + %u] -> Vertex %u:\n", start, i, ii);
+ }
+ else {
+ /* non-indexed arrays */
+ ii = start + i;
+ debug_printf("Vertex %u:\n", ii);
+ }
+
+ for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
+ uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
+ ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
+ ptr += draw->pt.vertex_buffer[buf].pitch * ii;
+ ptr += draw->pt.vertex_element[j].src_offset;
+
+ debug_printf(" Attr %u: ", j);
+ switch (draw->pt.vertex_element[j].src_format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f @ %p\n", v[0], (void *) v);
+ }
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v);
+ }
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
+ }
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
+ (void *) v);
+ }
+ break;
+ default:
+ debug_printf("other format (fix me)\n");
+ ;
+ }
+ }
+ }
+}
/**
@@ -195,6 +283,31 @@ draw_arrays(struct draw_context *draw, unsigned prim,
draw->reduced_prim = reduced_prim;
}
+ if (0)
+ draw_print_arrays(draw, prim, start, MIN2(count, 20));
+
+#if 0
+ {
+ int i;
+ debug_printf("draw_arrays(prim=%u start=%u count=%u):\n",
+ prim, start, count);
+ tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
+ debug_printf("Elements:\n");
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ debug_printf(" format=%s comps=%u\n",
+ pf_name(draw->pt.vertex_element[i].src_format),
+ draw->pt.vertex_element[i].nr_components);
+ }
+ debug_printf("Buffers:\n");
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ debug_printf(" pitch=%u offset=%u ptr=%p\n",
+ draw->pt.vertex_buffer[i].pitch,
+ draw->pt.vertex_buffer[i].buffer_offset,
+ draw->pt.user.vbuffer[i]);
+ }
+ }
+#endif
+
/* drawing done here: */
draw_pt_arrays(draw, prim, start, count);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 87232865e2..6141ba9cbf 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1632,6 +1632,17 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
+static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg tmp0 = aos_get_xmm_reg(cp);
+
+ sse2_cvttps2dq(cp->func, tmp0, arg0);
+ sse2_cvtdq2ps(cp->func, tmp0, tmp0);
+
+ store_dest(cp, &op->FullDstRegisters[0], tmp0);
+ return TRUE;
+}
static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
@@ -1770,6 +1781,9 @@ emit_instruction( struct aos_compilation *cp,
case TGSI_OPCODE_SIN:
return emit_SIN(cp, inst);
+ case TGSI_OPCODE_TRUNC:
+ return emit_TRUNC(cp, inst);
+
case TGSI_OPCODE_END:
return TRUE;
@@ -2176,7 +2190,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!vaos->buffer)
goto fail;
- debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
+ if (0)
+ debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
#if 0
tgsi_dump(vs->state.tokens, 0);
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 13d4fcfdbf..80c3606657 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -123,6 +123,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ max_vertices > 3);
+
/* run interpreter */
tgsi_exec_machine_run( machine );
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index 8eff6d4fda..8b75136144 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -54,31 +54,16 @@
typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
float (*outputs)[4][4],
float (*temps)[4][4],
- float (*immeds)[4][4],
+ float (*immeds)[4],
float (*consts)[4],
const float *builtins);
-#if 0
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4], /* 3 */
- struct tgsi_exec_vector *temporary, /* 4 */
- float (*immediates)[4], /* 5 */
- const float (*aos_input)[4], /* 6 */
- uint num_inputs, /* 7 */
- uint input_stride, /* 8 */
- float (*aos_output)[4], /* 9 */
- uint num_outputs, /* 10 */
- uint output_stride ); /* 11 */
-#endif
struct draw_ppc_vertex_shader {
struct draw_vertex_shader base;
struct ppc_function ppc_program;
codegen_function func;
-
- struct tgsi_exec_machine *machine;
};
@@ -86,11 +71,12 @@ static void
vs_ppc_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
+ /* nothing */
}
-
-/* Simplified vertex shader interface for the pt paths. Given the
+/**
+ * Simplified vertex shader interface for the pt paths. Given the
* complexity of code-generating all the above operations together,
* it's time to try doing all the other stuff separately.
*/
@@ -104,7 +90,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
unsigned output_stride )
{
struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
- struct tgsi_exec_machine *machine = shader->machine;
unsigned int i;
#define MAX_VERTICES 4
@@ -137,27 +122,11 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
/* run compiled shader
*/
-#if 0
- shader->func(machine->Inputs,
- machine->Outputs,
- (float (*)[4])constants,
- machine->Temps,
- (float (*)[4])shader->base.immediates,
- input,
- base->info.num_inputs,
- input_stride,
- output,
- base->info.num_outputs,
- output_stride );
-#else
shader->func(inputs_soa, outputs_soa, temps_soa,
- (float (*)[4][4]) shader->base.immediates,
+ (float (*)[4]) shader->base.immediates,
(float (*)[4]) constants,
ppc_builtin_constants);
- /*output[0][0] = input[0][0] * 0.5;*/
-#endif
-
/* convert (up to) four output verts from SoA back to AoS format */
for (attr = 0; attr < base->info.num_outputs; attr++) {
float *vOut = (float *) output;
@@ -183,8 +152,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
}
-
-
static void
vs_ppc_delete( struct draw_vertex_shader *base )
{
@@ -201,7 +168,7 @@ vs_ppc_delete( struct draw_vertex_shader *base )
struct draw_vertex_shader *
draw_create_vs_ppc(struct draw_context *draw,
- const struct pipe_shader_state *templ)
+ const struct pipe_shader_state *templ)
{
struct draw_ppc_vertex_shader *vs;
@@ -227,16 +194,14 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
- vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 *
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);
- vs->machine = &draw->vs.machine;
-
- ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */
+ ppc_init_func( &vs->ppc_program );
if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
&vs->ppc_program,
- (float (*)[4])vs->base.immediates,
+ (float (*)[4]) vs->base.immediates,
TRUE ))
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index b11ae31662..77ba5152f9 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -37,7 +37,7 @@
#include "draw_vs.h"
-#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_X86)
#include "pipe/p_shader_tokens.h"
@@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
struct tgsi_exec_machine *machine = shader->machine;
unsigned int i;
+ /* By default, execute all channels. XXX move this inside the loop
+ * below when we support shader conditionals/loops.
+ */
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+ if (max_vertices < 4) {
+ /* disable the unused execution channels */
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ 0);
+ }
+
/* run compiled shader
*/
shader->func(machine->Inputs,