From fe586f8612dd517b9a1f0d87fbaf3a75e3caf588 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 May 2008 18:59:45 -0400 Subject: redo the linear paths --- src/gallium/auxiliary/draw/draw_private.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_private.h') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index cee58bbf73..e036d498b8 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -247,6 +247,12 @@ void draw_pipeline_run( struct draw_context *draw, const ushort *elts, unsigned count ); +void draw_pipeline_run_linear( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); + void draw_pipeline_flush( struct draw_context *draw, -- cgit v1.2.3 From b5e5369da5fc50d63a6ece931fac44b555eb0314 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 12 May 2008 15:20:38 +0100 Subject: draw: add fetch-shade-emit path Enable with TEST_FSE=t. Performs fetch from API-provided vertex buffers, transformation with one of three (two working) hard-coded shaders, and final emit to hardware vertices all in a single pass. Currently only really useful for profiling in conjunction with SP_NO_RAST=t. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_private.h | 3 + src/gallium/auxiliary/draw/draw_pt.c | 25 +- src/gallium/auxiliary/draw/draw_pt.h | 3 + .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 714 +++++++++++++++++++++ 5 files changed, 742 insertions(+), 4 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c (limited to 'src/gallium/auxiliary/draw/draw_private.h') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index da7eded21f..68e7744cc5 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,6 +26,7 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ + draw_pt_fetch_shade_emit.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ draw_pt_varray.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index e036d498b8..cbe64cd290 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -124,12 +124,14 @@ struct draw_context struct { struct { struct draw_pt_middle_end *fetch_emit; + /*struct draw_pt_middle_end *fetch_shade_emit;*/ struct draw_pt_middle_end *general; } middle; struct { struct draw_pt_front_end *vcache; struct draw_pt_front_end *varray; + struct draw_pt_front_end *fetch_shade_emit; /* temp hack */ } front; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -154,6 +156,7 @@ struct draw_context const void *constants; } user; + boolean test_fse; } pt; struct { diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index bccde6c5fd..448deef98c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -64,7 +64,7 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_PIPELINE; } - if (!draw->bypass_clipping) { + if (!draw->bypass_clipping && !draw->pt.test_fse) { opt |= PT_CLIPTEST; } @@ -72,16 +72,20 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } - if (opt) - middle = draw->pt.middle.general; - else + + if (opt == 0) middle = draw->pt.middle.fetch_emit; + else + middle = draw->pt.middle.general; /* Pick the right frontend */ if (draw->pt.user.elts) { frontend = draw->pt.front.vcache; + } else if (opt == PT_SHADE && draw->pt.test_fse) { + /* should be a middle end.. */ + frontend = draw->pt.front.fetch_shade_emit; } else { frontend = draw->pt.front.varray; } @@ -113,6 +117,14 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; + draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; + if (draw->pt.test_fse) { + draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw ); + if (!draw->pt.front.fetch_shade_emit) + return FALSE; + } + + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; @@ -133,6 +145,11 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_emit = NULL; } + if (draw->pt.front.fetch_shade_emit) { + draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit ); + draw->pt.front.fetch_shade_emit = NULL; + } + if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 312fdbe4f4..bcd89f6bd6 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -121,6 +121,8 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); +struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); + /* Middle-ends: * * Currently one general-purpose case which can do all possibilities, @@ -132,6 +134,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); * vertex_elements. */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); +//struct draw_pt_middle_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c new file mode 100644 index 0000000000..9e1d1add36 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -0,0 +1,714 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + +struct fse_shader { + struct translate_key key; + + void (*run_linear)( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ); +}; + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_front_end base; + + struct draw_context *draw; + + struct translate_key key; + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + /* Points to one of the three hardwired example shaders, below: + */ + struct fse_shader *active; + + /* Temporary: A list of hard-wired shaders. Of course the plan + * would be to generate these for a given (vertex-shader, + * translate-key) pair... + */ + struct fse_shader shader[10]; + int nr_shaders; +}; + + + +/* Not quite passthrough yet -- we're still running the 'shader' here, + * inlined into the vertex fetch function. + */ +static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = fse->constants; + const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; + const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; + const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; + const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; + + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *st = fse->src[2] + start * fse->pitch[2]; + + float *out = (float *)buffer; + + + assert(fse->pitch[1] == 0); + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m0 * ix + m4 * iy + m8 * iz + m12; + out[1] = m1 * ix + m5 * iy + m9 * iz + m13; + out[2] = m2 * ix + m6 * iy + m10 * iz + m14; + out[3] = m3 * ix + m7 * iy + m11 * iz + m15; + xyz += fse->pitch[0]; + } + + { + out[4] = 1.0f; + out[5] = 1.0f; + out[6] = 1.0f; + out[7] = 1.0f; + } + + { + const float *in = (const float *)st; st += fse->pitch[2]; + out[8] = in[0]; + out[9] = in[1]; + out[10] = 0.0f; + out[11] = 1.0f; + } + + out += 12; + } +} + + + +static void fetch_xyz_rgb( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = (const float *)fse->constants; + const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; + const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; + const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; + const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; + + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + + float *out = (float *)buffer; + +// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); + + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m0 * ix + m4 * iy + m8 * iz + m12; + out[1] = m1 * ix + m5 * iy + m9 * iz + m13; + out[2] = m2 * ix + m6 * iy + m10 * iz + m14; + out[3] = m3 * ix + m7 * iy + m11 * iz + m15; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + out += 8; + } +} + + + + +static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = (const float *)fse->constants; + const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; + const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; + const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; + const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; + + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]); + const float psiz = 1.0; + + float *out = (float *)buffer; + + + assert(fse->pitch[1] == 0); + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m0 * ix + m4 * iy + m8 * iz + m12; + out[1] = m1 * ix + m5 * iy + m9 * iz + m13; + out[2] = m2 * ix + m6 * iy + m10 * iz + m14; + out[3] = m3 * ix + m7 * iy + m11 * iz + m15; + xyz += fse->pitch[0]; + } + + { + out[4] = rgb[0]; + out[5] = rgb[1]; + out[6] = rgb[2]; + out[7] = 1.0f; + } + + { + out[8] = psiz; + } + + out += 9; + } +} + + + + +static boolean set_prim( struct fetch_shade_emit *fse, + unsigned prim, + unsigned count ) +{ + struct draw_context *draw = fse->draw; + + fse->prim = prim; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + if (count > 1024) + return FALSE; + draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + if (count > 1024) + return FALSE; + draw->render->set_primitive( draw->render, prim ); + break; + + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); + break; + + default: + draw->render->set_primitive( draw->render, prim ); + break; + } + + return TRUE; +} + + + + + + +static void fse_prepare( struct draw_pt_front_end *fe, + unsigned prim, + struct draw_pt_middle_end *unused, + unsigned opt ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; + const struct vertex_info *vinfo; + unsigned i; + boolean need_psize = 0; + + + if (draw->pt.user.elts) { + assert(0); + return ; + } + + if (!set_prim(fse, prim, /*count*/1022 )) { + assert(0); + return ; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ + num_vs_inputs); /* inputs - fetch from api format */ + + fse->key.output_stride = vinfo->size * 4; + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = src->src_format; + + /* Consider ignoring these at this point, ie make generated + * programs independent of this state: + */ + fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; + fse->key.element[i].input_offset = 0; //src->src_offset; + } + + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned output_format = PIPE_FORMAT_NONE; + unsigned vs_output = vinfo->src_index[i]; + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + need_psize = 1; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + vs_output = num_vs_outputs + 1; + + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[vs_output].output_format = output_format; + fse->key.element[vs_output].output_offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + /* To make psize work, really need to tell the vertex shader to + * copy that value from input->output. For 'translate' this was + * implicit for all elements. + */ +#if 0 + if (need_psize) { + unsigned input = num_vs_inputs + 1; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + fse->key.element[i].input_buffer = 0; //nr_buffers + 1; + fse->key.element[i].input_offset = 0; + + fse->key.nr_elements += 1; + + } +#endif + + fse->constants = draw->pt.user.constants; + + /* Would normally look up a vertex shader and peruse its list of + * varients somehow. We omitted that step and put all the + * hardcoded "shaders" into an array. We're just making the + * assumption that this happens to be a matching shader... ie + * you're running isosurf, aren't you? + */ + fse->active = NULL; + for (i = 0; i < fse->nr_shaders; i++) { + if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) + fse->active = &fse->shader[i]; + } + + if (!fse->active) { + assert(0); + return ; + } + + /* Now set buffer pointers: + */ + for (i = 0; i < num_vs_inputs; i++) { + unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; + + fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + + draw->pt.vertex_buffer[buf].buffer_offset + + draw->pt.vertex_element[i].src_offset); + + fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; + + } + + + //return TRUE; +} + + +static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + return TRUE; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + return TRUE; + default: + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + return FALSE; + } +} + + + + +#define INDEX(i) (start + (i)) +static void fse_render_linear( struct vbuf_render *render, + unsigned prim, + unsigned start, + unsigned length ) +{ + ushort *tmp = NULL; + unsigned i, j; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + tmp = MALLOC( sizeof(ushort) * (length + 1) ); + + for (i = 0; i < length; i++) + tmp[i] = INDEX(i); + tmp[length] = 0; + + render->draw( render, + tmp, + length+1 ); + break; + + + case PIPE_PRIM_QUAD_STRIP: + tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); + + for (j = i = 0; i + 3 < length; i += 2, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+2); + tmp[j+4] = INDEX(i+0); + tmp[j+5] = INDEX(i+3); + } + + if (j) + render->draw( render, tmp, j ); + break; + + case PIPE_PRIM_QUADS: + tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); + + for (j = i = 0; i + 3 < length; i += 4, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+1); + tmp[j+4] = INDEX(i+2); + tmp[j+5] = INDEX(i+3); + } + + if (j) + render->draw( render, tmp, j ); + break; + + default: + render->draw_arrays( render, + start, + length ); + break; + } + + if (tmp) + FREE(tmp); +} + + + +static boolean do_draw( struct fetch_shade_emit *fse, + unsigned start, unsigned count ) +{ + struct draw_context *draw = fse->draw; + + char *hw_verts = + draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count ); + + if (!hw_verts) + return FALSE; + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done on hardware. + */ + fse->active->run_linear( fse, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + fse_render_linear( draw->render, + fse->prim, + 0, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); + + return TRUE; +} + + +static void +fse_run(struct draw_pt_front_end *fe, + pt_elt_func elt_func, + const void *elt_ptr, + unsigned count) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; + unsigned i = 0; + unsigned first, incr; + unsigned start = elt_func(elt_ptr, 0); + + //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); + + split_prim_inplace(fse->prim, &first, &incr); + + count -= (count - first) % incr; + + while (i + first <= count) { + int nr = MIN2( count - i, 1024 ); + + /* snap to prim boundary + */ + nr -= (nr - first) % incr; + + if (!do_draw( fse, start + i, nr )) { + assert(0); + return ; + } + + /* increment allowing for repeated vertices + */ + i += nr - (first - incr); + } + + //return TRUE; +} + + +static void fse_finish( struct draw_pt_front_end *frontend ) +{ +} + + +static void +fse_destroy( struct draw_pt_front_end *frontend ) +{ + FREE(frontend); +} + +struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + fse->shader[0].run_linear = fetch_xyz_rgb_st; + fse->shader[0].key.nr_elements = 3; + fse->shader[0].key.output_stride = 12 * sizeof(float); + + fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[0].input_buffer = 0; + fse->shader[0].key.element[0].input_offset = 0; + fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[0].output_offset = 0; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 16; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 32; + + fse->shader[1].run_linear = fetch_xyz_rgb; + fse->shader[1].key.nr_elements = 2; + fse->shader[1].key.output_stride = 8 * sizeof(float); + + fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[0].input_buffer = 0; + fse->shader[1].key.element[0].input_offset = 0; + fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[0].output_offset = 0; + + fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[1].input_buffer = 0; + fse->shader[1].key.element[1].input_offset = 0; + fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[1].output_offset = 16; + + fse->shader[2].run_linear = fetch_xyz_rgb_psiz; + fse->shader[2].key.nr_elements = 3; + fse->shader[2].key.output_stride = 9 * sizeof(float); + + fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[0].input_buffer = 0; + fse->shader[2].key.element[0].input_offset = 0; + fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[0].output_offset = 0; + + fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[1].input_buffer = 0; + fse->shader[2].key.element[1].input_offset = 0; + fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[1].output_offset = 16; + + /* psize is special + * -- effectively add it here as another input!?! + * -- who knows how to add it as a buffer? + */ + fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].input_buffer = 0; + fse->shader[2].key.element[2].input_offset = 0; + fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].output_offset = 32; + + fse->nr_shaders = 3; + + return &fse->base; +} -- cgit v1.2.3 From bbda45ec769120324f44febf00c6bb170f594f23 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 12 May 2008 19:40:20 +0100 Subject: draw: turn fse path into a middle end Also add some util functions in pt_util.c --- src/gallium/auxiliary/draw/Makefile | 2 + src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_pt.c | 34 +- src/gallium/auxiliary/draw/draw_pt.h | 9 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 39 +- src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 705 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pt_util.c | 103 +++ src/gallium/auxiliary/draw/draw_pt_varray.c | 45 -- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 2 +- .../auxiliary/draw/draw_pt_varray_tmp_linear.h | 9 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 15 +- 11 files changed, 847 insertions(+), 118 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_middle_fse.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_util.c (limited to 'src/gallium/auxiliary/draw/draw_private.h') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 68e7744cc5..67d78bdbbd 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -27,8 +27,10 @@ C_SOURCES = \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_shade_emit.c \ + draw_pt_middle_fse.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ + draw_pt_util.c \ draw_pt_varray.c \ draw_pt_vcache.c \ draw_vertex.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index cbe64cd290..86b901a3c8 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -124,7 +124,7 @@ struct draw_context struct { struct { struct draw_pt_middle_end *fetch_emit; - /*struct draw_pt_middle_end *fetch_shade_emit;*/ + struct draw_pt_middle_end *fetch_shade_emit; struct draw_pt_middle_end *general; } middle; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index d9e73a2396..91e35db819 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -75,6 +75,8 @@ draw_pt_arrays(struct draw_context *draw, if (opt == 0) middle = draw->pt.middle.fetch_emit; + else if (opt == PT_SHADE && draw->pt.test_fse) + middle = draw->pt.middle.fetch_shade_emit; else middle = draw->pt.middle.general; @@ -83,9 +85,11 @@ draw_pt_arrays(struct draw_context *draw, */ if (draw->pt.user.elts || (opt & PT_PIPELINE)) { frontend = draw->pt.front.vcache; +#if 0 } else if (opt == PT_SHADE && draw->pt.test_fse) { /* should be a middle end.. */ frontend = draw->pt.front.fetch_shade_emit; +#endif } else { frontend = draw->pt.front.varray; } @@ -105,6 +109,8 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { + draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; + draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) return FALSE; @@ -117,8 +123,11 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; - draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; if (draw->pt.test_fse) { + draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); + if (!draw->pt.middle.fetch_shade_emit) + return FALSE; + draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw ); if (!draw->pt.front.fetch_shade_emit) return FALSE; @@ -145,6 +154,11 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_emit = NULL; } + if (draw->pt.middle.fetch_shade_emit) { + draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit ); + draw->pt.middle.fetch_shade_emit = NULL; + } + if (draw->pt.front.fetch_shade_emit) { draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit ); draw->pt.front.fetch_shade_emit = NULL; @@ -163,19 +177,6 @@ void draw_pt_destroy( struct draw_context *draw ) -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES -}; - /** * Draw vertex arrays @@ -188,9 +189,10 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - if (reduced_prim[prim] != draw->reduced_prim) { + unsigned reduced_prim = draw_pt_reduced_prim(prim); + if (reduced_prim != draw->reduced_prim) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->reduced_prim = reduced_prim[prim]; + draw->reduced_prim = reduced_prim; } /* drawing done here: */ diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index bcd89f6bd6..cdae46b8d2 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -134,7 +134,7 @@ struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); * vertex_elements. */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); -//struct draw_pt_middle_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); @@ -213,4 +213,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); +/******************************************************************************* + * Utils: + */ +void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr); +unsigned draw_pt_reduced_prim(unsigned prim); + + #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 9e1d1add36..f756d3e0bb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -434,43 +434,6 @@ static void fse_prepare( struct draw_pt_front_end *fe, } -static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) -{ - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - return TRUE; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - return TRUE; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - return TRUE; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - return TRUE; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - return TRUE; - default: - *first = 0; - *incr = 1; /* set to one so that count % incr works */ - return FALSE; - } -} @@ -596,7 +559,7 @@ fse_run(struct draw_pt_front_end *fe, //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); - split_prim_inplace(fse->prim, &first, &incr); + draw_pt_split_prim(fse->prim, &first, &incr); count -= (count - first) % incr; diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c new file mode 100644 index 0000000000..cdb7d260da --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c @@ -0,0 +1,705 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + +struct fse_shader { + struct translate_key key; + + void (*run_linear)( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ); + + void (*run_elts)( const struct fetch_shade_emit *fse, + const unsigned *fetch_elts, + unsigned fetch_count, + char *buffer ); + +}; + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct translate_key key; + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + /* Points to one of the three hardwired example shaders, below: + */ + struct fse_shader *active; + + /* Temporary: A list of hard-wired shaders. Of course the plan + * would be to generate these for a given (vertex-shader, + * translate-key) pair... + */ + struct fse_shader shader[10]; + int nr_shaders; +}; + + + +/* Not quite passthrough yet -- we're still running the 'shader' here, + * inlined into the vertex fetch function. + */ +static void shader0_run_linear( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = fse->constants; + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + const ubyte *st = fse->src[2] + start * fse->pitch[2]; + + float *out = (float *)buffer; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + { + const float *in = (const float *)st; + out[8] = in[0]; + out[9] = in[1]; + out[10] = 0.0f; + out[11] = 1.0f; + st += fse->pitch[2]; + } + + out += 12; + } +} + + + +static void shader1_run_linear( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + float *out = (float *)buffer; + +// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); + + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + out += 8; + } +} + + + + +static void shader2_run_linear( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + const float psiz = 1.0; + float *out = (float *)buffer; + + + assert(fse->pitch[1] == 0); + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + { + out[8] = psiz; + } + + out += 9; + } +} + + + + +static void shader0_run_elts( const struct fetch_shade_emit *fse, + const unsigned *elts, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = fse->constants; + float *out = (float *)buffer; + + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = elts[i]; + { + const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + } + + { + const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + } + + { + const ubyte *st = fse->src[2] + elt * fse->pitch[2]; + const float *in = (const float *)st; + out[8] = in[0]; + out[9] = in[1]; + out[10] = 0.0f; + out[11] = 1.0f; + } + + out += 12; + } +} + + + +static void shader1_run_elts( const struct fetch_shade_emit *fse, + const unsigned *elts, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + float *out = (float *)buffer; + + for (i = 0; i < count; i++) { + unsigned elt = elts[i]; + + { + const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + out += 8; + } +} + + + + +static void shader2_run_elts( const struct fetch_shade_emit *fse, + const unsigned *elts, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + const float psiz = 1.0; + float *out = (float *)buffer; + + for (i = 0; i < count; i++) { + unsigned elt = elts[i]; + { + const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + } + + { + const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; + out[4] = rgb[0]; + out[5] = rgb[1]; + out[6] = rgb[2]; + out[7] = 1.0f; + } + + { + out[8] = psiz; + } + + out += 9; + } +} + + + +static void fse_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; + const struct vertex_info *vinfo; + unsigned i; + boolean need_psize = 0; + + + if (draw->pt.user.elts) { + assert(0); + return ; + } + + if (!draw->render->set_primitive( draw->render, + prim )) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ + num_vs_inputs); /* inputs - fetch from api format */ + + fse->key.output_stride = vinfo->size * 4; + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = src->src_format; + + /* Consider ignoring these at this point, ie make generated + * programs independent of this state: + */ + fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; + fse->key.element[i].input_offset = 0; //src->src_offset; + } + + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned output_format = PIPE_FORMAT_NONE; + unsigned vs_output = vinfo->src_index[i]; + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + need_psize = 1; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + vs_output = num_vs_outputs + 1; + + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[vs_output].output_format = output_format; + fse->key.element[vs_output].output_offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + /* To make psize work, really need to tell the vertex shader to + * copy that value from input->output. For 'translate' this was + * implicit for all elements. + */ +#if 0 + if (need_psize) { + unsigned input = num_vs_inputs + 1; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + fse->key.element[i].input_buffer = 0; //nr_buffers + 1; + fse->key.element[i].input_offset = 0; + + fse->key.nr_elements += 1; + + } +#endif + + fse->constants = draw->pt.user.constants; + + /* Would normally look up a vertex shader and peruse its list of + * varients somehow. We omitted that step and put all the + * hardcoded "shaders" into an array. We're just making the + * assumption that this happens to be a matching shader... ie + * you're running isosurf, aren't you? + */ + fse->active = NULL; + for (i = 0; i < fse->nr_shaders; i++) { + if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) + fse->active = &fse->shader[i]; + } + + if (!fse->active) { + assert(0); + return ; + } + + /* Now set buffer pointers: + */ + for (i = 0; i < num_vs_inputs; i++) { + unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; + + fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + + draw->pt.vertex_buffer[buf].buffer_offset + + draw->pt.vertex_element[i].src_offset); + + fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; + + } + + + //return TRUE; +} + + + + + + + +static void fse_run_linear( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done elsewhere -- + * either by the API or on hardware, or for some other reason not + * required... + */ + fse->active->run_linear( fse, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + +static void +fse_run(struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)fetch_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices, run shader and emit HW verts. + */ + fse->active->run_elts( fse, + fetch_elts, + fetch_count, + hw_verts ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + fetch_count ); + +} + + +static void fse_finish( struct draw_pt_middle_end *middle ) +{ +} + + +static void +fse_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.run_linear = fse_run_linear; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + fse->shader[0].run_linear = shader0_run_linear; + fse->shader[0].run_elts = shader0_run_elts; + fse->shader[0].key.nr_elements = 3; + fse->shader[0].key.output_stride = 12 * sizeof(float); + + fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[0].input_buffer = 0; + fse->shader[0].key.element[0].input_offset = 0; + fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[0].output_offset = 0; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 16; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 32; + + fse->shader[1].run_linear = shader1_run_linear; + fse->shader[1].run_elts = shader1_run_elts; + fse->shader[1].key.nr_elements = 2; + fse->shader[1].key.output_stride = 8 * sizeof(float); + + fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[0].input_buffer = 0; + fse->shader[1].key.element[0].input_offset = 0; + fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[0].output_offset = 0; + + fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[1].input_buffer = 0; + fse->shader[1].key.element[1].input_offset = 0; + fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[1].output_offset = 16; + + fse->shader[2].run_linear = shader2_run_linear; + fse->shader[2].run_elts = shader2_run_elts; + fse->shader[2].key.nr_elements = 3; + fse->shader[2].key.output_stride = 9 * sizeof(float); + + fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[0].input_buffer = 0; + fse->shader[2].key.element[0].input_offset = 0; + fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[0].output_offset = 0; + + fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[1].input_buffer = 0; + fse->shader[2].key.element[1].input_offset = 0; + fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[1].output_offset = 16; + + /* psize is special + * -- effectively add it here as another input!?! + * -- who knows how to add it as a buffer? + */ + fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].input_buffer = 0; + fse->shader[2].key.element[2].input_offset = 0; + fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].output_offset = 32; + + fse->nr_shaders = 3; + + return &fse->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c new file mode 100644 index 0000000000..32c8a9632c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + break; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + *first = 2; + *incr = 1; + break; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + *first = 3; + *incr = 1; + break; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + break; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + break; + default: + assert(0); + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + break; + } +} + + +unsigned draw_pt_reduced_prim(unsigned prim) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + return PIPE_PRIM_POINTS; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + return PIPE_PRIM_LINES; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return PIPE_PRIM_TRIANGLES; + default: + assert(0); + return PIPE_PRIM_POINTS; + } +} + + diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index d92ad4fda1..af6e2d5157 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -106,51 +106,6 @@ static INLINE void fetch_init(struct varray_frontend *varray, } -static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) -{ - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - return TRUE; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - return TRUE; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - return TRUE; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLE_FAN: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - return TRUE; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - return TRUE; - case PIPE_PRIM_POLYGON: - *first = 3; - *incr = 1; - return TRUE; - default: - *first = 0; - *incr = 1; /* set to one so that count % incr works */ - return FALSE; - } -} static INLINE void add_draw_el(struct varray_frontend *varray, diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 1395275897..6979f6b544 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -15,7 +15,7 @@ static void FUNC(struct draw_pt_front_end *frontend, varray->fetch_start = start; - split_prim_inplace(varray->input_prim, &first, &incr); + draw_pt_split_prim(varray->input_prim, &first, &incr); #if 0 debug_printf("%s (%d) %d/%d\n", __FUNCTION__, diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index b6f1f0cadc..114ed371a0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -16,7 +16,13 @@ static void FUNC(struct draw_pt_front_end *frontend, varray->fetch_start = start; - split_prim_inplace(varray->input_prim, &first, &incr); + draw_pt_split_prim(varray->input_prim, &first, &incr); + + /* Sanitize primitive length: + */ + count = trim(count, first, incr); + if (count < first) + return; #if 0 debug_printf("%s (%d) %d/%d\n", __FUNCTION__, @@ -32,7 +38,6 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: - for (j = 0; j < count;) { unsigned remaining = count - j; unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr ); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 6b3fb1406b..a3495f2a30 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -204,19 +204,6 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES -}; - static void vcache_prepare( struct draw_pt_front_end *frontend, @@ -236,7 +223,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, } vcache->input_prim = prim; - vcache->output_prim = reduced_prim[prim]; + vcache->output_prim = draw_pt_reduced_prim(prim); vcache->middle = middle; middle->prepare( middle, vcache->output_prim, opt ); -- cgit v1.2.3 From b23706454bb165a62888d264e95a98a2e4cf139c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 13 May 2008 13:35:14 +0100 Subject: draw: get rid of fetch-shade-emit frontend hack The code is now living in it's intended place as a pt middle end. --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/draw_private.h | 1 - src/gallium/auxiliary/draw/draw_pt.c | 14 - src/gallium/auxiliary/draw/draw_pt.h | 1 - .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 677 --------------------- 5 files changed, 694 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c (limited to 'src/gallium/auxiliary/draw/draw_private.h') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 67d78bdbbd..3053682da8 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,7 +26,6 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ - draw_pt_fetch_shade_emit.c \ draw_pt_middle_fse.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 86b901a3c8..fd51a57781 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -131,7 +131,6 @@ struct draw_context struct { struct draw_pt_front_end *vcache; struct draw_pt_front_end *varray; - struct draw_pt_front_end *fetch_shade_emit; /* temp hack */ } front; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 91e35db819..75f44d503e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -85,11 +85,6 @@ draw_pt_arrays(struct draw_context *draw, */ if (draw->pt.user.elts || (opt & PT_PIPELINE)) { frontend = draw->pt.front.vcache; -#if 0 - } else if (opt == PT_SHADE && draw->pt.test_fse) { - /* should be a middle end.. */ - frontend = draw->pt.front.fetch_shade_emit; -#endif } else { frontend = draw->pt.front.varray; } @@ -127,10 +122,6 @@ boolean draw_pt_init( struct draw_context *draw ) draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); if (!draw->pt.middle.fetch_shade_emit) return FALSE; - - draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw ); - if (!draw->pt.front.fetch_shade_emit) - return FALSE; } @@ -159,11 +150,6 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_shade_emit = NULL; } - if (draw->pt.front.fetch_shade_emit) { - draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit ); - draw->pt.front.fetch_shade_emit = NULL; - } - if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index cdae46b8d2..e03816ebbc 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -121,7 +121,6 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); -struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); /* Middle-ends: * diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c deleted file mode 100644 index f756d3e0bb..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ /dev/null @@ -1,677 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" -#include "draw/draw_vs.h" - -#include "translate/translate.h" - -struct fetch_shade_emit; - -struct fse_shader { - struct translate_key key; - - void (*run_linear)( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ); -}; - -/* Prototype fetch, shade, emit-hw-verts all in one go. - */ -struct fetch_shade_emit { - struct draw_pt_front_end base; - - struct draw_context *draw; - - struct translate_key key; - - /* Temporaries: - */ - const float *constants; - unsigned pitch[PIPE_MAX_ATTRIBS]; - const ubyte *src[PIPE_MAX_ATTRIBS]; - unsigned prim; - - /* Points to one of the three hardwired example shaders, below: - */ - struct fse_shader *active; - - /* Temporary: A list of hard-wired shaders. Of course the plan - * would be to generate these for a given (vertex-shader, - * translate-key) pair... - */ - struct fse_shader shader[10]; - int nr_shaders; -}; - - - -/* Not quite passthrough yet -- we're still running the 'shader' here, - * inlined into the vertex fetch function. - */ -static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = fse->constants; - const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; - const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; - const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; - const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; - - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *st = fse->src[2] + start * fse->pitch[2]; - - float *out = (float *)buffer; - - - assert(fse->pitch[1] == 0); - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m0 * ix + m4 * iy + m8 * iz + m12; - out[1] = m1 * ix + m5 * iy + m9 * iz + m13; - out[2] = m2 * ix + m6 * iy + m10 * iz + m14; - out[3] = m3 * ix + m7 * iy + m11 * iz + m15; - xyz += fse->pitch[0]; - } - - { - out[4] = 1.0f; - out[5] = 1.0f; - out[6] = 1.0f; - out[7] = 1.0f; - } - - { - const float *in = (const float *)st; st += fse->pitch[2]; - out[8] = in[0]; - out[9] = in[1]; - out[10] = 0.0f; - out[11] = 1.0f; - } - - out += 12; - } -} - - - -static void fetch_xyz_rgb( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = (const float *)fse->constants; - const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; - const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; - const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; - const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; - - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; - - float *out = (float *)buffer; - -// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); - - - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m0 * ix + m4 * iy + m8 * iz + m12; - out[1] = m1 * ix + m5 * iy + m9 * iz + m13; - out[2] = m2 * ix + m6 * iy + m10 * iz + m14; - out[3] = m3 * ix + m7 * iy + m11 * iz + m15; - xyz += fse->pitch[0]; - } - - { - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - rgb += fse->pitch[1]; - } - - out += 8; - } -} - - - - -static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = (const float *)fse->constants; - const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; - const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; - const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; - const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; - - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]); - const float psiz = 1.0; - - float *out = (float *)buffer; - - - assert(fse->pitch[1] == 0); - - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m0 * ix + m4 * iy + m8 * iz + m12; - out[1] = m1 * ix + m5 * iy + m9 * iz + m13; - out[2] = m2 * ix + m6 * iy + m10 * iz + m14; - out[3] = m3 * ix + m7 * iy + m11 * iz + m15; - xyz += fse->pitch[0]; - } - - { - out[4] = rgb[0]; - out[5] = rgb[1]; - out[6] = rgb[2]; - out[7] = 1.0f; - } - - { - out[8] = psiz; - } - - out += 9; - } -} - - - - -static boolean set_prim( struct fetch_shade_emit *fse, - unsigned prim, - unsigned count ) -{ - struct draw_context *draw = fse->draw; - - fse->prim = prim; - - switch (prim) { - case PIPE_PRIM_LINE_LOOP: - if (count > 1024) - return FALSE; - draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); - break; - - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - if (count > 1024) - return FALSE; - draw->render->set_primitive( draw->render, prim ); - break; - - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: - draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); - break; - - default: - draw->render->set_primitive( draw->render, prim ); - break; - } - - return TRUE; -} - - - - - - -static void fse_prepare( struct draw_pt_front_end *fe, - unsigned prim, - struct draw_pt_middle_end *unused, - unsigned opt ) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; - struct draw_context *draw = fse->draw; - unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; - unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; - const struct vertex_info *vinfo; - unsigned i; - boolean need_psize = 0; - - - if (draw->pt.user.elts) { - assert(0); - return ; - } - - if (!set_prim(fse, prim, /*count*/1022 )) { - assert(0); - return ; - } - - /* Must do this after set_primitive() above: - */ - vinfo = draw->render->get_vertex_info(draw->render); - - - - fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ - num_vs_inputs); /* inputs - fetch from api format */ - - fse->key.output_stride = vinfo->size * 4; - memset(fse->key.element, 0, - fse->key.nr_elements * sizeof(fse->key.element[0])); - - for (i = 0; i < num_vs_inputs; i++) { - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = src->src_format; - - /* Consider ignoring these at this point, ie make generated - * programs independent of this state: - */ - fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; - fse->key.element[i].input_offset = 0; //src->src_offset; - } - - - { - unsigned dst_offset = 0; - - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - unsigned output_format = PIPE_FORMAT_NONE; - unsigned vs_output = vinfo->src_index[i]; - - switch (vinfo->emit[i]) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - need_psize = 1; - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - vs_output = num_vs_outputs + 1; - - break; - default: - assert(0); - break; - } - - /* The elements in the key correspond to vertex shader output - * numbers, not to positions in the hw vertex description -- - * that's handled by the output_offset field. - */ - fse->key.element[vs_output].output_format = output_format; - fse->key.element[vs_output].output_offset = dst_offset; - - dst_offset += emit_sz; - assert(fse->key.output_stride >= dst_offset); - } - } - - /* To make psize work, really need to tell the vertex shader to - * copy that value from input->output. For 'translate' this was - * implicit for all elements. - */ -#if 0 - if (need_psize) { - unsigned input = num_vs_inputs + 1; - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; - fse->key.element[i].input_buffer = 0; //nr_buffers + 1; - fse->key.element[i].input_offset = 0; - - fse->key.nr_elements += 1; - - } -#endif - - fse->constants = draw->pt.user.constants; - - /* Would normally look up a vertex shader and peruse its list of - * varients somehow. We omitted that step and put all the - * hardcoded "shaders" into an array. We're just making the - * assumption that this happens to be a matching shader... ie - * you're running isosurf, aren't you? - */ - fse->active = NULL; - for (i = 0; i < fse->nr_shaders; i++) { - if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) - fse->active = &fse->shader[i]; - } - - if (!fse->active) { - assert(0); - return ; - } - - /* Now set buffer pointers: - */ - for (i = 0; i < num_vs_inputs; i++) { - unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; - - fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + - draw->pt.vertex_buffer[buf].buffer_offset + - draw->pt.vertex_element[i].src_offset); - - fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; - - } - - - //return TRUE; -} - - - - - - -#define INDEX(i) (start + (i)) -static void fse_render_linear( struct vbuf_render *render, - unsigned prim, - unsigned start, - unsigned length ) -{ - ushort *tmp = NULL; - unsigned i, j; - - switch (prim) { - case PIPE_PRIM_LINE_LOOP: - tmp = MALLOC( sizeof(ushort) * (length + 1) ); - - for (i = 0; i < length; i++) - tmp[i] = INDEX(i); - tmp[length] = 0; - - render->draw( render, - tmp, - length+1 ); - break; - - - case PIPE_PRIM_QUAD_STRIP: - tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); - - for (j = i = 0; i + 3 < length; i += 2, j += 6) { - tmp[j+0] = INDEX(i+0); - tmp[j+1] = INDEX(i+1); - tmp[j+2] = INDEX(i+3); - - tmp[j+3] = INDEX(i+2); - tmp[j+4] = INDEX(i+0); - tmp[j+5] = INDEX(i+3); - } - - if (j) - render->draw( render, tmp, j ); - break; - - case PIPE_PRIM_QUADS: - tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); - - for (j = i = 0; i + 3 < length; i += 4, j += 6) { - tmp[j+0] = INDEX(i+0); - tmp[j+1] = INDEX(i+1); - tmp[j+2] = INDEX(i+3); - - tmp[j+3] = INDEX(i+1); - tmp[j+4] = INDEX(i+2); - tmp[j+5] = INDEX(i+3); - } - - if (j) - render->draw( render, tmp, j ); - break; - - default: - render->draw_arrays( render, - start, - length ); - break; - } - - if (tmp) - FREE(tmp); -} - - - -static boolean do_draw( struct fetch_shade_emit *fse, - unsigned start, unsigned count ) -{ - struct draw_context *draw = fse->draw; - - char *hw_verts = - draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); - - if (!hw_verts) - return FALSE; - - /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done on hardware. - */ - fse->active->run_linear( fse, - start, count, - hw_verts ); - - /* Draw arrays path to avoid re-emitting index list again and - * again. - */ - fse_render_linear( draw->render, - fse->prim, - 0, - count ); - - - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); - - return TRUE; -} - - -static void -fse_run(struct draw_pt_front_end *fe, - pt_elt_func elt_func, - const void *elt_ptr, - unsigned count) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; - unsigned i = 0; - unsigned first, incr; - unsigned start = elt_func(elt_ptr, 0); - - //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); - - draw_pt_split_prim(fse->prim, &first, &incr); - - count -= (count - first) % incr; - - while (i + first <= count) { - int nr = MIN2( count - i, 1024 ); - - /* snap to prim boundary - */ - nr -= (nr - first) % incr; - - if (!do_draw( fse, start + i, nr )) { - assert(0); - return ; - } - - /* increment allowing for repeated vertices - */ - i += nr - (first - incr); - } - - //return TRUE; -} - - -static void fse_finish( struct draw_pt_front_end *frontend ) -{ -} - - -static void -fse_destroy( struct draw_pt_front_end *frontend ) -{ - FREE(frontend); -} - -struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ) -{ - struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); - if (!fse) - return NULL; - - fse->base.prepare = fse_prepare; - fse->base.run = fse_run; - fse->base.finish = fse_finish; - fse->base.destroy = fse_destroy; - fse->draw = draw; - - fse->shader[0].run_linear = fetch_xyz_rgb_st; - fse->shader[0].key.nr_elements = 3; - fse->shader[0].key.output_stride = 12 * sizeof(float); - - fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[0].key.element[0].input_buffer = 0; - fse->shader[0].key.element[0].input_offset = 0; - fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[0].output_offset = 0; - - fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[0].key.element[1].input_buffer = 0; - fse->shader[0].key.element[1].input_offset = 0; - fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[1].output_offset = 16; - - fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; - fse->shader[0].key.element[1].input_buffer = 0; - fse->shader[0].key.element[1].input_offset = 0; - fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[1].output_offset = 32; - - fse->shader[1].run_linear = fetch_xyz_rgb; - fse->shader[1].key.nr_elements = 2; - fse->shader[1].key.output_stride = 8 * sizeof(float); - - fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[1].key.element[0].input_buffer = 0; - fse->shader[1].key.element[0].input_offset = 0; - fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[1].key.element[0].output_offset = 0; - - fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[1].key.element[1].input_buffer = 0; - fse->shader[1].key.element[1].input_offset = 0; - fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[1].key.element[1].output_offset = 16; - - fse->shader[2].run_linear = fetch_xyz_rgb_psiz; - fse->shader[2].key.nr_elements = 3; - fse->shader[2].key.output_stride = 9 * sizeof(float); - - fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[2].key.element[0].input_buffer = 0; - fse->shader[2].key.element[0].input_offset = 0; - fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[2].key.element[0].output_offset = 0; - - fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[2].key.element[1].input_buffer = 0; - fse->shader[2].key.element[1].input_offset = 0; - fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[2].key.element[1].output_offset = 16; - - /* psize is special - * -- effectively add it here as another input!?! - * -- who knows how to add it as a buffer? - */ - fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; - fse->shader[2].key.element[2].input_buffer = 0; - fse->shader[2].key.element[2].input_offset = 0; - fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; - fse->shader[2].key.element[2].output_offset = 32; - - fse->nr_shaders = 3; - - return &fse->base; -} -- cgit v1.2.3 From 2f0d1396e4c1626b3b1ac799bd29e86a9530369e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 13 May 2008 13:40:22 +0100 Subject: draw: move some state into a new 'vs' area --- src/gallium/auxiliary/draw/draw_context.c | 21 ++++--------- src/gallium/auxiliary/draw/draw_pipe.h | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +-- src/gallium/auxiliary/draw/draw_pipe_clip.c | 6 ++-- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 4 +-- src/gallium/auxiliary/draw/draw_private.h | 28 +++++++++++------ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 ++-- src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 4 +-- src/gallium/auxiliary/draw/draw_vs.c | 35 +++++++++++++++++++--- src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 15 files changed, 75 insertions(+), 47 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_private.h') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 98e23fa830..2242074965 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -56,12 +56,6 @@ struct draw_context *draw_create( void ) draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ - tgsi_exec_machine_init(&draw->machine); - - /* FIXME: give this machine thing a proper constructor: - */ - draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); - draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); if (!draw_pipeline_init( draw )) goto fail; @@ -69,6 +63,9 @@ struct draw_context *draw_create( void ) if (!draw_pt_init( draw )) goto fail; + if (!draw_vs_init( draw )) + goto fail; + return draw; fail: @@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw ) return; - if (draw->machine.Inputs) - align_free(draw->machine.Inputs); - - if (draw->machine.Outputs) - align_free(draw->machine.Outputs); - - tgsi_exec_machine_free_data(&draw->machine); /* Not so fast -- we're just borrowing this at the moment. * @@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw ) draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); + draw_vs_destroy( draw ); FREE( draw ); } @@ -295,7 +286,7 @@ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index) { - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == semantic_name && @@ -320,7 +311,7 @@ draw_find_vs_output(struct draw_context *draw, uint draw_num_vs_outputs(struct draw_context *draw) { - uint count = draw->vertex_shader->info.num_outputs; + uint count = draw->vs.vertex_shader->info.num_outputs; if (draw->extra_vp_outputs.slot > 0) count++; return count; diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index f1cb0891ca..dbad8f98ac 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage, { struct vertex_header *tmp = stage->tmp[idx]; const uint vsize = sizeof(struct vertex_header) - + stage->draw->num_vs_outputs * 4 * sizeof(float); + + stage->draw->vs.num_vs_outputs * 4 * sizeof(float); memcpy(tmp, vert, vsize); tmp->vertex_id = UNDEFINED_VERTEX_ID; return tmp; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f501b2aed4..d93708ad3c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -651,7 +651,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) } /* update vertex attrib info */ - aaline->tex_slot = draw->num_vs_outputs; + aaline->tex_slot = draw->vs.num_vs_outputs; assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ /* advertise the extra post-transformed vertex attribute */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 122a48660a..97d74ad693 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -681,7 +681,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) bind_aapoint_fragment_shader(aapoint); /* update vertex attrib info */ - aapoint->tex_slot = draw->num_vs_outputs; + aapoint->tex_slot = draw->vs.num_vs_outputs; assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; @@ -692,7 +692,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) aapoint->psize_slot = -1; if (draw->rasterizer->point_size_per_vertex) { /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index ce80c94163..c11ed934a4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -112,7 +112,7 @@ static void interp( const struct clipper *clip, const struct vertex_header *out, const struct vertex_header *in ) { - const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; + const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; unsigned j; /* Vertex header. @@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.flags |= edge_last; if (0) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint j, k; debug_printf("Clipped tri:\n"); for (j = 0; j < 3; j++) { @@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage ) clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; if (clipper->flat) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint i; clipper->num_color_attribs = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 09b68c4559..21a9c3b77f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -159,7 +159,7 @@ static void flatshade_line_1( struct draw_stage *stage, static void flatshade_init_state( struct draw_stage *stage ) { struct flat_stage *flat = flat_stage(stage); - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint i; /* Find which vertex shader outputs are colors, make a list */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 3cbced362e..4673d5dcba 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw, const struct vertex_header *v1 ) { uint attr; - for (attr = 0; attr < draw->num_vs_outputs; attr++) { + for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { const float *val0 = v0->data[attr]; const float *val1 = v1->data[attr]; float *newv = dst->data[attr]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 50872fdbe9..3ac825f565 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) { struct twoside_stage *twoside = twoside_stage(stage); - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint i; twoside->attrib_front0 = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index ed08573382..df92e3f2d0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -197,7 +197,7 @@ static void widepoint_first_point( struct draw_stage *stage, if (draw->rasterizer->point_sprite) { /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i, j = 0; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { @@ -212,7 +212,7 @@ static void widepoint_first_point( struct draw_stage *stage, wide->psize_slot = -1; if (draw->rasterizer->point_size_per_vertex) { /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index fd51a57781..3418ee2b88 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -169,13 +169,24 @@ struct draw_context /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; + boolean identity_viewport; - struct draw_vertex_shader *vertex_shader; + struct { + struct draw_vertex_shader *vertex_shader; + uint num_vs_outputs; /**< convenience, from vertex_shader */ - boolean identity_viewport; - uint num_vs_outputs; /**< convenience, from vertex_shader */ + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine machine; + + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. + */ + struct gallivm_cpu_engine *engine; + + struct translate_cache *fetch_cache; + struct translate_cache *emit_cache; + } vs; /* Clip derived state: */ @@ -192,16 +203,15 @@ struct draw_context unsigned reduced_prim; - /** TGSI program interpreter runtime state */ - struct tgsi_exec_machine machine; - - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. - */ - struct gallivm_cpu_engine *engine; void *driver_private; }; +/******************************************************************************* + * Vertex shader code: + */ +boolean draw_vs_init( struct draw_context *draw ); +void draw_vs_destroy( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index dad54690a5..06718779a5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -55,7 +55,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vs = draw->vertex_shader; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -107,7 +107,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vertex_shader; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; unsigned alloc_count = align_int( fetch_count, 4 ); @@ -183,7 +183,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vertex_shader; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; unsigned alloc_count = align_int( count, 4 ); diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c index cdb7d260da..643ea151c1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c +++ b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c @@ -368,8 +368,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; - unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; - unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; + unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs; const struct vertex_info *vinfo; unsigned i; boolean need_psize = 0; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 03fe00a951..4142dd9589 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -66,13 +66,13 @@ draw_bind_vertex_shader(struct draw_context *draw, if (dvs) { - draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->info.num_outputs; + draw->vs.vertex_shader = dvs; + draw->vs.num_vs_outputs = dvs->info.num_outputs; dvs->prepare( dvs, draw ); } else { - draw->vertex_shader = NULL; - draw->num_vs_outputs = 0; + draw->vs.vertex_shader = NULL; + draw->vs.num_vs_outputs = 0; } } @@ -83,3 +83,30 @@ draw_delete_vertex_shader(struct draw_context *draw, { dvs->delete( dvs ); } + + + +boolean +draw_vs_init( struct draw_context *draw ) +{ + tgsi_exec_machine_init(&draw->vs.machine); + /* FIXME: give this machine thing a proper constructor: + */ + draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + + return TRUE; +} + +void +draw_vs_destroy( struct draw_context *draw ) +{ + if (draw->vs.machine.Inputs) + align_free(draw->vs.machine.Inputs); + + if (draw->vs.machine.Outputs) + align_free(draw->vs.machine.Outputs); + + tgsi_exec_machine_free_data(&draw->vs.machine); + +} diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 7a02f6334b..cb80d008cd 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -182,7 +182,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->machine = &draw->machine; + vs->machine = &draw->vs.machine; return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index edf235cddc..13ad032bd3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,7 +158,7 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; - vs->machine = &draw->machine; + vs->machine = &draw->vs.machine; x86_init_func( &vs->sse2_program ); -- cgit v1.2.3 From 7c99d7fe60e7bb0b7cf103a851aeef4614278ca6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 15 May 2008 12:39:08 +0100 Subject: draw: create specialized vs varients incorporating fetch & emit --- src/gallium/auxiliary/draw/Makefile | 3 +- src/gallium/auxiliary/draw/draw_private.h | 2 + .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 338 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs.c | 83 ++++- src/gallium/auxiliary/draw/draw_vs.h | 105 +++++++ src/gallium/auxiliary/draw/draw_vs_exec.c | 2 + src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 + src/gallium/auxiliary/draw/draw_vs_sse.c | 2 + src/gallium/auxiliary/draw/draw_vs_varient.c | 229 ++++++++++++++ 9 files changed, 764 insertions(+), 2 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c create mode 100644 src/gallium/auxiliary/draw/draw_vs_varient.c (limited to 'src/gallium/auxiliary/draw/draw_private.h') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 3053682da8..84877994fb 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,7 +26,7 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ - draw_pt_middle_fse.c \ + draw_pt_fetch_shade_emit.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ draw_pt_util.c \ @@ -34,6 +34,7 @@ C_SOURCES = \ draw_pt_vcache.c \ draw_vertex.c \ draw_vs.c \ + draw_vs_varient.c \ draw_vs_exec.c \ draw_vs_llvm.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3418ee2b88..c095bf3d7b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -184,7 +184,9 @@ struct draw_context struct gallivm_cpu_engine *engine; + struct translate *fetch; struct translate_cache *fetch_cache; + struct translate *emit; struct translate_cache *emit_cache; } vs; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c new file mode 100644 index 0000000000..74945dcfe9 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_middle_end base; + struct draw_context *draw; + + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + struct draw_vs_varient_key key; + struct draw_vs_varient *active; +}; + + + + +static void fse_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs; + const struct vertex_info *vinfo; + unsigned i; + boolean need_psize = 0; + + + if (draw->pt.user.elts) { + assert(0); + return ; + } + + if (!draw->render->set_primitive( draw->render, + prim )) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ + num_vs_inputs); /* inputs - fetch from api format */ + + fse->key.output_stride = vinfo->size * 4; + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].in.format = src->src_format; + + /* Consider ignoring these, ie make generated programs + * independent of this state: + */ + fse->key.element[i].in.buffer = src->vertex_buffer_index; + fse->key.element[i].in.offset = src->src_offset; + } + + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned output_format = PIPE_FORMAT_NONE; + unsigned vs_output = vinfo->src_index[i]; + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + need_psize = 1; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + vs_output = num_vs_outputs + 1; + + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[vs_output].out.format = output_format; + fse->key.element[vs_output].out.offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + /* To make psize work, really need to tell the vertex shader to + * copy that value from input->output. For 'translate' this was + * implicit for all elements. + */ +#if 0 + if (need_psize) { + unsigned input = num_vs_inputs + 1; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + fse->key.element[i].input_buffer = 0; //nr_buffers + 1; + fse->key.element[i].input_offset = 0; + + fse->key.nr_elements += 1; + + } +#endif + + /* Would normally look up a vertex shader and peruse its list of + * varients somehow. We omitted that step and put all the + * hardcoded "shaders" into an array. We're just making the + * assumption that this happens to be a matching shader... ie + * you're running isosurf, aren't you? + */ + fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, + &fse->key ); + + if (!fse->active) { + assert(0); + return ; + } + + /* Now set buffer pointers: + */ + for (i = 0; i < num_vs_inputs; i++) { + unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; + + fse->active->set_input( fse->active, + i, + + ((const ubyte *) draw->pt.user.vbuffer[buf] + + draw->pt.vertex_buffer[buf].buffer_offset), + + draw->pt.vertex_buffer[buf].pitch ); + } + + fse->active->set_constants( fse->active, + (const float (*)[4])draw->pt.user.constants ); + + //return TRUE; +} + + + + + + + +static void fse_run_linear( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned alloc_count = align(count, 4); + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)alloc_count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done elsewhere -- + * either by the API or on hardware, or for some other reason not + * required... + */ + fse->active->run_linear( fse->active, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + +static void +fse_run(struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned alloc_count = align(fetch_count, 4); + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)alloc_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices, run shader and emit HW verts. + */ + fse->active->run_elts( fse->active, + fetch_elts, + fetch_count, + hw_verts ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + fetch_count ); + +} + + +static void fse_finish( struct draw_pt_middle_end *middle ) +{ +} + + +static void +fse_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.run_linear = fse_run_linear; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + return &fse->base; +} diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 4142dd9589..9b899d404e 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -36,6 +36,8 @@ #include "draw_private.h" #include "draw_context.h" #include "draw_vs.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" @@ -90,11 +92,25 @@ boolean draw_vs_init( struct draw_context *draw ) { tgsi_exec_machine_init(&draw->vs.machine); + /* FIXME: give this machine thing a proper constructor: */ draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); - draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw->vs.machine.Inputs) + return FALSE; + draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw->vs.machine.Outputs) + return FALSE; + + draw->vs.emit_cache = translate_cache_create(); + if (!draw->vs.emit_cache) + return FALSE; + + draw->vs.fetch_cache = translate_cache_create(); + if (!draw->vs.fetch_cache) + return FALSE; + return TRUE; } @@ -107,6 +123,71 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.machine.Outputs) align_free(draw->vs.machine.Outputs); + if (draw->vs.fetch_cache) + translate_cache_destroy(draw->vs.fetch_cache); + + if (draw->vs.emit_cache) + translate_cache_destroy(draw->vs.emit_cache); + tgsi_exec_machine_free_data(&draw->vs.machine); } + + +struct draw_vs_varient * +draw_vs_lookup_varient( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + struct draw_vs_varient *varient; + unsigned i; + + /* Lookup existing varient: + */ + for (i = 0; i < vs->nr_varients; i++) + if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) + return vs->varient[i]; + + /* Else have to create a new one: + */ + varient = vs->create_varient( vs, key ); + if (varient == NULL) + return NULL; + + /* Add it to our list: + */ + assert(vs->nr_varients < Elements(vs->varient)); + vs->varient[vs->nr_varients++] = varient; + + /* Done + */ + return varient; +} + + +struct translate * +draw_vs_get_fetch( struct draw_context *draw, + struct translate_key *key ) +{ + if (!draw->vs.fetch || + translate_key_compare(&draw->vs.fetch->key, key) != 0) + { + translate_key_sanitize(key); + draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key); + } + + return draw->vs.fetch; +} + +struct translate * +draw_vs_get_emit( struct draw_context *draw, + struct translate_key *key ) +{ + if (!draw->vs.emit || + translate_key_compare(&draw->vs.emit->key, key) != 0) + { + translate_key_sanitize(key); + draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key); + } + + return draw->vs.emit; +} diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index f9772b83b8..677be0d28d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -38,10 +38,63 @@ struct draw_context; struct pipe_shader_state; +struct draw_vs_input +{ + enum pipe_format format; + unsigned buffer; + unsigned offset; +}; + +struct draw_vs_output +{ + enum pipe_format format; + unsigned offset; +}; + +struct draw_vs_element { + struct draw_vs_input in; + struct draw_vs_output out; +}; + +struct draw_vs_varient_key { + unsigned output_stride; + unsigned nr_elements; + struct draw_vs_element element[PIPE_MAX_ATTRIBS]; +}; + +struct draw_vs_varient { + struct draw_vs_varient_key key; + + struct draw_vertex_shader *vs; + + void (*set_input)( struct draw_vs_varient *, + unsigned i, + const void *ptr, + unsigned stride ); + + void (*set_constants)( struct draw_vs_varient *, + const float (*constants)[4] ); + + + void (*run_linear)( struct draw_vs_varient *shader, + unsigned start, + unsigned count, + void *output_buffer ); + + void (*run_elts)( struct draw_vs_varient *shader, + const unsigned *elts, + unsigned count, + void *output_buffer ); + + void (*destroy)( struct draw_vs_varient * ); +}; + + /** * Private version of the compiled vertex_shader */ struct draw_vertex_shader { + struct draw_context *draw; /* This member will disappear shortly: */ @@ -49,6 +102,14 @@ struct draw_vertex_shader { struct tgsi_shader_info info; + /* + */ + struct draw_vs_varient *varient[16]; + unsigned nr_varients; + struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, + const struct draw_vs_varient_key *key ); + + void (*prepare)( struct draw_vertex_shader *shader, struct draw_context *draw ); @@ -68,6 +129,15 @@ struct draw_vertex_shader { }; +struct draw_vs_varient * +draw_vs_lookup_varient( struct draw_vertex_shader *base, + const struct draw_vs_varient_key *key ); + + +/******************************************************************************** + * Internal functions: + */ + struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *templ); @@ -80,8 +150,43 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); +/******************************************************************************** + * Helpers for vs implementations that don't do their own fetch/emit varients. + * Means these can be shared between shaders. + */ +struct translate; +struct translate_key; + +struct translate *draw_vs_get_fetch( struct draw_context *draw, + struct translate_key *key ); + + +struct translate *draw_vs_get_emit( struct draw_context *draw, + struct translate_key *key ); + +struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); + + + +static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_vs_element); +} + +static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, + const struct draw_vs_varient_key *b ) +{ + int keysize = draw_vs_varient_keysize(a); + return memcmp(a, b, keysize); +} + + + + #define MAX_TGSI_VERTICES 4 + #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index cb80d008cd..4501877efc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -179,9 +179,11 @@ draw_create_vs_exec(struct draw_context *draw, tgsi_scan_shader(state->tokens, &vs->base.info); + vs->base.draw = draw; vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; + vs->base.create_varient = draw_vs_varient_generic; vs->machine = &draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 171da51dd5..621472ec7c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw, tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); + vs->base.draw = draw; vs->base.prepare = vs_llvm_prepare; + vs->base.create_varient = draw_vs_varient_generic; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; vs->machine = &draw->machine; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 13ad032bd3..df94a7e0c7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -155,6 +155,8 @@ draw_create_vs_sse(struct draw_context *draw, tgsi_scan_shader(templ->tokens, &vs->base.info); + vs->base.draw = draw; + vs->base.create_varient = draw_vs_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c new file mode 100644 index 0000000000..d27b0f6187 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -0,0 +1,229 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vs.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +/* A first pass at incorporating vertex fetch/emit functionality into + */ +struct draw_vs_varient_generic { + struct draw_vs_varient base; + + + + struct draw_vertex_shader *shader; + struct draw_context *draw; + + /* Basic plan is to run these two translate functions before/after + * the vertex shader's existing run_linear() routine to simulate + * the inclusion of this functionality into the shader... + * + * Next will look at actually including it. + */ + struct translate *fetch; + struct translate *emit; + + const float (*constants)[4]; +}; + + + + +static void vsvg_set_constants( struct draw_vs_varient *varient, + const float (*constants)[4] ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + vsvg->constants = constants; +} + + +static void vsvg_set_input( struct draw_vs_varient *varient, + unsigned buffer, + const void *ptr, + unsigned stride ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + vsvg->fetch->set_buffer(vsvg->fetch, + buffer, + ptr, + stride); +} + + +static void vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + /* Want to do this in small batches for cache locality? + */ + + vsvg->fetch->run_elts( vsvg->fetch, + elts, + count, + output_buffer ); + + //if (!vsvg->base.vs->is_passthrough) + { + vsvg->base.vs->run_linear( vsvg->base.vs, + output_buffer, + output_buffer, + vsvg->constants, + count, + vsvg->base.key.output_stride, + vsvg->base.key.output_stride); + + //if (!vsvg->already_in_emit_format) + + vsvg->emit->set_buffer( vsvg->emit, + 0, + output_buffer, + vsvg->base.key.output_stride ); + + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); + } +} + + +static void vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + //debug_printf("%s %d %d\n", __FUNCTION__, start, count); + + + vsvg->fetch->run( vsvg->fetch, + start, + count, + output_buffer ); + + //if (!vsvg->base.vs->is_passthrough) + { + vsvg->base.vs->run_linear( vsvg->base.vs, + output_buffer, + output_buffer, + vsvg->constants, + count, + vsvg->base.key.output_stride, + vsvg->base.key.output_stride); + + //if (!vsvg->already_in_emit_format) + vsvg->emit->set_buffer( vsvg->emit, + 0, + output_buffer, + vsvg->base.key.output_stride ); + + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); + } +} + + + +static void vsvg_destroy( struct draw_vs_varient *varient ) +{ + FREE(varient); +} + + +struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + unsigned i; + struct translate_key fetch, emit; + + struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); + if (vsvg == NULL) + return NULL; + + vsvg->base.key = *key; + vsvg->base.vs = vs; + vsvg->base.set_input = vsvg_set_input; + vsvg->base.set_constants = vsvg_set_constants; + vsvg->base.run_elts = vsvg_run_elts; + vsvg->base.run_linear = vsvg_run_linear; + vsvg->base.destroy = vsvg_destroy; + + + + /* OK, have to build a new one: + */ + fetch.nr_elements = vs->info.num_inputs; + fetch.output_stride = 0; + for (i = 0; i < vs->info.num_inputs; i++) { + fetch.element[i].input_format = key->element[i].in.format; + fetch.element[i].input_buffer = key->element[i].in.buffer; + fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fetch.element[i].output_offset = fetch.output_stride; + fetch.output_stride += 4 * sizeof(float); + } + + + emit.nr_elements = vs->info.num_outputs; + emit.output_stride = key->output_stride; + for (i = 0; i < vs->info.num_outputs; i++) { + emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit.element[i].input_buffer = 0; + emit.element[i].input_offset = i * 4 * sizeof(float); + emit.element[i].output_format = key->element[i].out.format; + emit.element[i].output_offset = key->element[i].out.offset; + } + + vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); + vsvg->emit = draw_vs_get_emit( vs->draw, &emit ); + + return &vsvg->base; +} + + + + + -- cgit v1.2.3