summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/Makefile46
-rw-r--r--src/gallium/auxiliary/draw/SConscript46
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c430
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h182
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c319
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.h126
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c921
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c884
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c516
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c148
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_flatshade.c282
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c185
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c777
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c251
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_twoside.c200
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_unfilled.c204
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_util.c137
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c339
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c492
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c181
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c333
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h319
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c324
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h233
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h182
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c89
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c313
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c234
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c437
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c429
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c395
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c233
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c77
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c193
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h238
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h91
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c516
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h214
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h129
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.c129
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h162
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c254
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h224
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c2274
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h253
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c462
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_machine.c324
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c161
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c245
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c217
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c322
52 files changed, 16880 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
new file mode 100644
index 0000000000..5041dcc072
--- /dev/null
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -0,0 +1,46 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = draw
+
+C_SOURCES = \
+ draw_context.c \
+ draw_pipe.c \
+ draw_pipe_aaline.c \
+ draw_pipe_aapoint.c \
+ draw_pipe_clip.c \
+ draw_pipe_cull.c \
+ draw_pipe_flatshade.c \
+ draw_pipe_offset.c \
+ draw_pipe_pstipple.c \
+ draw_pipe_stipple.c \
+ draw_pipe_twoside.c \
+ draw_pipe_unfilled.c \
+ draw_pipe_util.c \
+ draw_pipe_validate.c \
+ draw_pipe_vbuf.c \
+ draw_pipe_wide_line.c \
+ draw_pipe_wide_point.c \
+ draw_pt.c \
+ draw_pt_elts.c \
+ draw_pt_emit.c \
+ draw_pt_fetch.c \
+ draw_pt_fetch_emit.c \
+ draw_pt_fetch_shade_emit.c \
+ draw_pt_fetch_shade_pipeline.c \
+ draw_pt_post_vs.c \
+ draw_pt_util.c \
+ draw_pt_varray.c \
+ draw_pt_vcache.c \
+ draw_vertex.c \
+ draw_vs.c \
+ draw_vs_varient.c \
+ draw_vs_aos.c \
+ draw_vs_aos_io.c \
+ draw_vs_aos_machine.c \
+ draw_vs_exec.c \
+ draw_vs_llvm.c \
+ draw_vs_ppc.c \
+ draw_vs_sse.c
+
+include ../../Makefile.template
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
new file mode 100644
index 0000000000..5f05aa324a
--- /dev/null
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -0,0 +1,46 @@
+Import('*')
+
+draw = env.ConvenienceLibrary(
+ target = 'draw',
+ source = [
+ 'draw_context.c',
+ 'draw_pipe.c',
+ 'draw_pipe_aaline.c',
+ 'draw_pipe_aapoint.c',
+ 'draw_pipe_clip.c',
+ 'draw_pipe_cull.c',
+ 'draw_pipe_flatshade.c',
+ 'draw_pipe_offset.c',
+ 'draw_pipe_pstipple.c',
+ 'draw_pipe_stipple.c',
+ 'draw_pipe_twoside.c',
+ 'draw_pipe_unfilled.c',
+ 'draw_pipe_util.c',
+ 'draw_pipe_validate.c',
+ 'draw_pipe_vbuf.c',
+ 'draw_pipe_wide_line.c',
+ 'draw_pipe_wide_point.c',
+ 'draw_pt.c',
+ 'draw_pt_elts.c',
+ 'draw_pt_emit.c',
+ 'draw_pt_fetch.c',
+ 'draw_pt_fetch_emit.c',
+ 'draw_pt_fetch_shade_emit.c',
+ 'draw_pt_fetch_shade_pipeline.c',
+ 'draw_pt_post_vs.c',
+ 'draw_pt_util.c',
+ 'draw_pt_varray.c',
+ 'draw_pt_vcache.c',
+ 'draw_vertex.c',
+ 'draw_vs.c',
+ 'draw_vs_aos.c',
+ 'draw_vs_aos_io.c',
+ 'draw_vs_aos_machine.c',
+ 'draw_vs_exec.c',
+ 'draw_vs_llvm.c',
+ 'draw_vs_ppc.c',
+ 'draw_vs_sse.c',
+ 'draw_vs_varient.c'
+ ])
+
+auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
new file mode 100644
index 0000000000..a4f1fcddc1
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -0,0 +1,430 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "draw_context.h"
+#include "draw_vbuf.h"
+#include "draw_vs.h"
+#include "draw_pt.h"
+#include "draw_pipe.h"
+
+
+struct draw_context *draw_create( void )
+{
+ struct draw_context *draw = CALLOC_STRUCT( draw_context );
+ if (draw == NULL)
+ goto fail;
+
+ ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
+ ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
+ ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
+ ASSIGN_4V( draw->plane[3], 0, 1, 0, 1 );
+ ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
+ ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
+ draw->nr_planes = 6;
+
+
+ draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
+
+
+ if (!draw_pipeline_init( draw ))
+ goto fail;
+
+ if (!draw_pt_init( draw ))
+ goto fail;
+
+ if (!draw_vs_init( draw ))
+ goto fail;
+
+ return draw;
+
+fail:
+ draw_destroy( draw );
+ return NULL;
+}
+
+
+void draw_destroy( struct draw_context *draw )
+{
+ if (!draw)
+ return;
+
+
+
+ /* Not so fast -- we're just borrowing this at the moment.
+ *
+ if (draw->render)
+ draw->render->destroy( draw->render );
+ */
+
+ draw_pipeline_destroy( draw );
+ draw_pt_destroy( draw );
+ draw_vs_destroy( draw );
+
+ FREE( draw );
+}
+
+
+
+void draw_flush( struct draw_context *draw )
+{
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+}
+
+
+/**
+ * Specify the Minimum Resolvable Depth factor for polygon offset.
+ * This factor potentially depends on the number of Z buffer bits,
+ * the rasterization algorithm and the arithmetic performed on Z
+ * values between vertex shading and rasterization. It will vary
+ * from one driver to another.
+ */
+void draw_set_mrd(struct draw_context *draw, double mrd)
+{
+ draw->mrd = mrd;
+}
+
+
+/**
+ * Register new primitive rasterization/rendering state.
+ * This causes the drawing pipeline to be rebuilt.
+ */
+void draw_set_rasterizer_state( struct draw_context *draw,
+ const struct pipe_rasterizer_state *raster )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->rasterizer = raster;
+ draw->bypass_clipping =
+ ((draw->rasterizer && draw->rasterizer->bypass_vs_clip_and_viewport) ||
+ draw->driver.bypass_clipping);
+}
+
+
+void draw_set_driver_clipping( struct draw_context *draw,
+ boolean bypass_clipping )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->driver.bypass_clipping = bypass_clipping;
+ draw->bypass_clipping =
+ ((draw->rasterizer && draw->rasterizer->bypass_vs_clip_and_viewport) ||
+ draw->driver.bypass_clipping);
+}
+
+
+/**
+ * Plug in the primitive rendering/rasterization stage (which is the last
+ * stage in the drawing pipeline).
+ * This is provided by the device driver.
+ */
+void draw_set_rasterize_stage( struct draw_context *draw,
+ struct draw_stage *stage )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->pipeline.rasterize = stage;
+}
+
+
+/**
+ * Set the draw module's clipping state.
+ */
+void draw_set_clip_state( struct draw_context *draw,
+ const struct pipe_clip_state *clip )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
+ memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
+ draw->nr_planes = 6 + clip->nr;
+}
+
+
+/**
+ * Set the draw module's viewport state.
+ */
+void draw_set_viewport_state( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->viewport = *viewport; /* struct copy */
+ draw->identity_viewport = (viewport->scale[0] == 1.0f &&
+ viewport->scale[1] == 1.0f &&
+ viewport->scale[2] == 1.0f &&
+ viewport->scale[3] == 1.0f &&
+ viewport->translate[0] == 0.0f &&
+ viewport->translate[1] == 0.0f &&
+ viewport->translate[2] == 0.0f &&
+ viewport->translate[3] == 0.0f);
+
+ draw_vs_set_viewport( draw, viewport );
+}
+
+
+
+void
+draw_set_vertex_buffers(struct draw_context *draw,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0]));
+ draw->pt.nr_vertex_buffers = count;
+}
+
+
+void
+draw_set_vertex_elements(struct draw_context *draw,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0]));
+ draw->pt.nr_vertex_elements = count;
+}
+
+
+/**
+ * Tell drawing context where to find mapped vertex buffers.
+ */
+void
+draw_set_mapped_vertex_buffer(struct draw_context *draw,
+ unsigned attr, const void *buffer)
+{
+ draw->pt.user.vbuffer[attr] = buffer;
+}
+
+
+void
+draw_set_mapped_constant_buffer(struct draw_context *draw,
+ const void *buffer,
+ unsigned size )
+{
+ draw->pt.user.constants = buffer;
+ draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
+}
+
+
+/**
+ * Tells the draw module to draw points with triangles if their size
+ * is greater than this threshold.
+ */
+void
+draw_wide_point_threshold(struct draw_context *draw, float threshold)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->pipeline.wide_point_threshold = threshold;
+}
+
+
+/**
+ * Tells the draw module to draw lines with triangles if their width
+ * is greater than this threshold.
+ */
+void
+draw_wide_line_threshold(struct draw_context *draw, float threshold)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->pipeline.wide_line_threshold = threshold;
+}
+
+
+/**
+ * Tells the draw module whether or not to implement line stipple.
+ */
+void
+draw_enable_line_stipple(struct draw_context *draw, boolean enable)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->pipeline.line_stipple = enable;
+}
+
+
+/**
+ * Tells draw module whether to convert points to quads for sprite mode.
+ */
+void
+draw_enable_point_sprites(struct draw_context *draw, boolean enable)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->pipeline.point_sprite = enable;
+}
+
+
+void
+draw_set_force_passthrough( struct draw_context *draw, boolean enable )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->force_passthrough = enable;
+}
+
+
+/**
+ * Ask the draw module for the location/slot of the given vertex attribute in
+ * a post-transformed vertex.
+ *
+ * With this function, drivers that use the draw module should have no reason
+ * to track the current vertex shader.
+ *
+ * Note that the draw module may sometimes generate vertices with extra
+ * attributes (such as texcoords for AA lines). The driver can call this
+ * function to find those attributes.
+ *
+ * Zero is returned if the attribute is not found since this is
+ * a don't care / undefined situtation. Returning -1 would be a bit more
+ * work for the drivers.
+ */
+int
+draw_find_vs_output(const struct draw_context *draw,
+ uint semantic_name, uint semantic_index)
+{
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ uint i;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == semantic_name &&
+ vs->info.output_semantic_index[i] == semantic_index)
+ return i;
+ }
+
+ /* XXX there may be more than one extra vertex attrib.
+ * For example, simulated gl_FragCoord and gl_PointCoord.
+ */
+ if (draw->extra_vp_outputs.semantic_name == semantic_name &&
+ draw->extra_vp_outputs.semantic_index == semantic_index) {
+ return draw->extra_vp_outputs.slot;
+ }
+ return 0;
+}
+
+
+/**
+ * Return number of vertex shader outputs.
+ */
+uint
+draw_num_vs_outputs(const struct draw_context *draw)
+{
+ uint count = draw->vs.vertex_shader->info.num_outputs;
+ if (draw->extra_vp_outputs.slot > 0)
+ count++;
+ return count;
+}
+
+
+/**
+ * Provide TGSI sampler objects for vertex shaders that use texture fetches.
+ * This might only be used by software drivers for the time being.
+ */
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers)
+{
+ draw->vs.num_samplers = num_samplers;
+ draw->vs.samplers = samplers;
+}
+
+
+
+
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render )
+{
+ draw->render = render;
+}
+
+void draw_set_edgeflags( struct draw_context *draw,
+ const unsigned *edgeflag )
+{
+ draw->pt.user.edgeflag = edgeflag;
+}
+
+
+
+
+/**
+ * Tell the drawing context about the index/element buffer to use
+ * (ala glDrawElements)
+ * If no element buffer is to be used (i.e. glDrawArrays) then this
+ * should be called with eltSize=0 and elements=NULL.
+ *
+ * \param draw the drawing context
+ * \param eltSize size of each element (1, 2 or 4 bytes)
+ * \param elements the element buffer ptr
+ */
+void
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+ unsigned eltSize,
+ unsigned min_index,
+ unsigned max_index,
+ const void *elements )
+{
+ draw->pt.user.elts = elements;
+ draw->pt.user.eltSize = eltSize;
+ draw->pt.user.min_index = min_index;
+ draw->pt.user.max_index = max_index;
+}
+
+
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize,
+ const void *elements )
+{
+ draw->pt.user.elts = elements;
+ draw->pt.user.eltSize = eltSize;
+ draw->pt.user.min_index = 0;
+ draw->pt.user.max_index = 0xffffffff;
+}
+
+
+/* Revamp me please:
+ */
+void draw_do_flush( struct draw_context *draw, unsigned flags )
+{
+ if (!draw->suspend_flushing)
+ {
+ assert(!draw->flushing); /* catch inadvertant recursion */
+
+ draw->flushing = TRUE;
+
+ draw_pipeline_flush( draw, flags );
+
+ draw->reduced_prim = ~0; /* is reduced_prim needed any more? */
+
+ draw->flushing = FALSE;
+ }
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
new file mode 100644
index 0000000000..d529e4e9a2
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -0,0 +1,182 @@
+
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Public interface into the drawing module.
+ */
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef DRAW_CONTEXT_H
+#define DRAW_CONTEXT_H
+
+
+#include "pipe/p_state.h"
+
+
+struct pipe_context;
+struct draw_context;
+struct draw_stage;
+struct draw_vertex_shader;
+struct tgsi_sampler;
+
+
+struct draw_context *draw_create( void );
+
+void draw_destroy( struct draw_context *draw );
+
+void draw_set_viewport_state( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport );
+
+void draw_set_clip_state( struct draw_context *pipe,
+ const struct pipe_clip_state *clip );
+
+void draw_set_rasterizer_state( struct draw_context *draw,
+ const struct pipe_rasterizer_state *raster );
+
+void draw_set_rasterize_stage( struct draw_context *draw,
+ struct draw_stage *stage );
+
+void draw_wide_point_threshold(struct draw_context *draw, float threshold);
+
+void draw_wide_line_threshold(struct draw_context *draw, float threshold);
+
+void draw_enable_line_stipple(struct draw_context *draw, boolean enable);
+
+void draw_enable_point_sprites(struct draw_context *draw, boolean enable);
+
+void draw_set_mrd(struct draw_context *draw, double mrd);
+
+boolean
+draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe);
+
+boolean
+draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe);
+
+boolean
+draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe);
+
+
+int
+draw_find_vs_output(const struct draw_context *draw,
+ uint semantic_name, uint semantic_index);
+
+uint
+draw_num_vs_outputs(const struct draw_context *draw);
+
+
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers);
+
+
+
+/*
+ * Vertex shader functions
+ */
+
+struct draw_vertex_shader *
+draw_create_vertex_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader);
+void draw_bind_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs);
+void draw_delete_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs);
+
+
+
+/*
+ * Vertex data functions
+ */
+
+void draw_set_vertex_buffers(struct draw_context *draw,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers);
+
+void draw_set_vertex_elements(struct draw_context *draw,
+ unsigned count,
+ const struct pipe_vertex_element *elements);
+
+void
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+ unsigned eltSize,
+ unsigned min_index,
+ unsigned max_index,
+ const void *elements );
+
+void draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize,
+ const void *elements );
+
+void draw_set_mapped_vertex_buffer(struct draw_context *draw,
+ unsigned attr, const void *buffer);
+
+void draw_set_mapped_constant_buffer(struct draw_context *draw,
+ const void *buffer,
+ unsigned size );
+
+void draw_set_edgeflags( struct draw_context *draw,
+ const unsigned *edgeflag );
+
+
+/***********************************************************************
+ * draw_prim.c
+ */
+
+void draw_arrays(struct draw_context *draw, unsigned prim,
+ unsigned start, unsigned count);
+
+void draw_flush(struct draw_context *draw);
+
+
+/*******************************************************************************
+ * Driver backend interface
+ */
+struct vbuf_render;
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render );
+
+void draw_set_driver_clipping( struct draw_context *draw,
+ boolean bypass_clipping );
+
+void draw_set_force_passthrough( struct draw_context *draw,
+ boolean enable );
+
+/*******************************************************************************
+ * Draw pipeline
+ */
+boolean draw_need_pipeline(const struct draw_context *draw,
+ const struct pipe_rasterizer_state *rasterizer,
+ unsigned prim );
+
+
+
+#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
new file mode 100644
index 0000000000..1c6d657297
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -0,0 +1,319 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_private.h"
+#include "draw/draw_pipe.h"
+
+
+
+boolean draw_pipeline_init( struct draw_context *draw )
+{
+ /* create pipeline stages */
+ draw->pipeline.wide_line = draw_wide_line_stage( draw );
+ draw->pipeline.wide_point = draw_wide_point_stage( draw );
+ draw->pipeline.stipple = draw_stipple_stage( draw );
+ draw->pipeline.unfilled = draw_unfilled_stage( draw );
+ draw->pipeline.twoside = draw_twoside_stage( draw );
+ draw->pipeline.offset = draw_offset_stage( draw );
+ draw->pipeline.clip = draw_clip_stage( draw );
+ draw->pipeline.flatshade = draw_flatshade_stage( draw );
+ draw->pipeline.cull = draw_cull_stage( draw );
+ draw->pipeline.validate = draw_validate_stage( draw );
+ draw->pipeline.first = draw->pipeline.validate;
+
+ if (!draw->pipeline.wide_line ||
+ !draw->pipeline.wide_point ||
+ !draw->pipeline.stipple ||
+ !draw->pipeline.unfilled ||
+ !draw->pipeline.twoside ||
+ !draw->pipeline.offset ||
+ !draw->pipeline.clip ||
+ !draw->pipeline.flatshade ||
+ !draw->pipeline.cull ||
+ !draw->pipeline.validate)
+ return FALSE;
+
+ /* these defaults are oriented toward the needs of softpipe */
+ draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */
+ draw->pipeline.wide_line_threshold = 1.0;
+ draw->pipeline.line_stipple = TRUE;
+ draw->pipeline.point_sprite = TRUE;
+
+ return TRUE;
+}
+
+
+void draw_pipeline_destroy( struct draw_context *draw )
+{
+ if (draw->pipeline.wide_line)
+ draw->pipeline.wide_line->destroy( draw->pipeline.wide_line );
+ if (draw->pipeline.wide_point)
+ draw->pipeline.wide_point->destroy( draw->pipeline.wide_point );
+ if (draw->pipeline.stipple)
+ draw->pipeline.stipple->destroy( draw->pipeline.stipple );
+ if (draw->pipeline.unfilled)
+ draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
+ if (draw->pipeline.twoside)
+ draw->pipeline.twoside->destroy( draw->pipeline.twoside );
+ if (draw->pipeline.offset)
+ draw->pipeline.offset->destroy( draw->pipeline.offset );
+ if (draw->pipeline.clip)
+ draw->pipeline.clip->destroy( draw->pipeline.clip );
+ if (draw->pipeline.flatshade)
+ draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
+ if (draw->pipeline.cull)
+ draw->pipeline.cull->destroy( draw->pipeline.cull );
+ if (draw->pipeline.validate)
+ draw->pipeline.validate->destroy( draw->pipeline.validate );
+ if (draw->pipeline.aaline)
+ draw->pipeline.aaline->destroy( draw->pipeline.aaline );
+ if (draw->pipeline.aapoint)
+ draw->pipeline.aapoint->destroy( draw->pipeline.aapoint );
+ if (draw->pipeline.pstipple)
+ draw->pipeline.pstipple->destroy( draw->pipeline.pstipple );
+ if (draw->pipeline.rasterize)
+ draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+}
+
+
+
+
+
+
+
+static void do_point( struct draw_context *draw,
+ const char *v0 )
+{
+ struct prim_header prim;
+
+ prim.flags = 0;
+ prim.pad = 0;
+ prim.v[0] = (struct vertex_header *)v0;
+
+ draw->pipeline.first->point( draw->pipeline.first, &prim );
+}
+
+
+static void do_line( struct draw_context *draw,
+ ushort flags,
+ const char *v0,
+ const char *v1 )
+{
+ struct prim_header prim;
+
+ prim.flags = flags;
+ prim.pad = 0;
+ prim.v[0] = (struct vertex_header *)v0;
+ prim.v[1] = (struct vertex_header *)v1;
+
+ draw->pipeline.first->line( draw->pipeline.first, &prim );
+}
+
+
+static void do_triangle( struct draw_context *draw,
+ ushort flags,
+ char *v0,
+ char *v1,
+ char *v2 )
+{
+ struct prim_header prim;
+
+ prim.v[0] = (struct vertex_header *)v0;
+ prim.v[1] = (struct vertex_header *)v1;
+ prim.v[2] = (struct vertex_header *)v2;
+ prim.flags = flags;
+ prim.pad = 0;
+
+ draw->pipeline.first->tri( draw->pipeline.first, &prim );
+}
+
+
+
+#define QUAD(i0,i1,i2,i3) \
+ do_triangle( draw, \
+ ( DRAW_PIPE_RESET_STIPPLE | \
+ DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_2 ), \
+ verts + stride * elts[i0], \
+ verts + stride * elts[i1], \
+ verts + stride * elts[i3]); \
+ do_triangle( draw, \
+ ( DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_1 ), \
+ verts + stride * elts[i1], \
+ verts + stride * elts[i2], \
+ verts + stride * elts[i3])
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, \
+ elts[i0], /* flags */ \
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * elts[i1], \
+ verts + stride * elts[i2])
+
+#define LINE(flags,i0,i1) \
+ do_line( draw, \
+ elts[i0], \
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * elts[i1])
+
+#define POINT(i0) \
+ do_point( draw, \
+ verts + stride * elts[i0] )
+
+#define FUNC pipe_run
+#define ARGS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ struct vertex_header *vertices, \
+ unsigned stride, \
+ const ushort *elts
+
+#define LOCAL_VARS \
+ char *verts = (char *)vertices; \
+ boolean flatfirst = (draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ unsigned i; \
+ ushort flags
+
+#define FLUSH
+
+#include "draw_pt_decompose.h"
+#undef ARGS
+#undef LOCAL_VARS
+
+
+
+/* Code to run the pipeline on a fairly arbitary collection of vertices.
+ *
+ * Vertex headers must be pre-initialized with the
+ * UNDEFINED_VERTEX_ID, this code will cause that id to become
+ * overwritten, so it may have to be reset if there is the intention
+ * to reuse the vertices.
+ *
+ * This code provides a callback to reset the vertex id's which the
+ * draw_vbuf.c code uses when it has to perform a flush.
+ */
+void draw_pipeline_run( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count )
+{
+ char *verts = (char *)vertices;
+
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = stride;
+ draw->pipeline.vertex_count = vertex_count;
+
+ pipe_run(draw, prim, vertices, stride, elts, count);
+
+ draw->pipeline.verts = NULL;
+ draw->pipeline.vertex_count = 0;
+}
+
+#define QUAD(i0,i1,i2,i3) \
+ do_triangle( draw, \
+ ( DRAW_PIPE_RESET_STIPPLE | \
+ DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_2 ), \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i3)); \
+ do_triangle( draw, \
+ ( DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_1 ), \
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i2), \
+ verts + stride * (i3))
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, \
+ flags, /* flags */ \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i2))
+
+#define LINE(flags,i0,i1) \
+ do_line( draw, \
+ flags, \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1))
+
+#define POINT(i0) \
+ do_point( draw, \
+ verts + stride * i0 )
+
+#define FUNC pipe_run_linear
+#define ARGS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ struct vertex_header *vertices, \
+ unsigned stride
+
+#define LOCAL_VARS \
+ char *verts = (char *)vertices; \
+ boolean flatfirst = (draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ unsigned i; \
+ ushort flags
+
+#define FLUSH
+
+#include "draw_pt_decompose.h"
+
+void draw_pipeline_run_linear( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ char *verts = (char *)vertices;
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = stride;
+ draw->pipeline.vertex_count = count;
+
+ pipe_run_linear(draw, prim, vertices, stride, count);
+
+ draw->pipeline.verts = NULL;
+ draw->pipeline.vertex_count = 0;
+}
+
+
+void draw_pipeline_flush( struct draw_context *draw,
+ unsigned flags )
+{
+ draw->pipeline.first->flush( draw->pipeline.first, flags );
+ draw->pipeline.first = draw->pipeline.validate;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
new file mode 100644
index 0000000000..479250729f
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -0,0 +1,126 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef DRAW_PIPE_H
+#define DRAW_PIPE_H
+
+#include "pipe/p_compiler.h"
+#include "draw_private.h" /* for sizeof(vertex_header) */
+
+
+/**
+ * Basic info for a point/line/triangle primitive.
+ */
+struct prim_header {
+ float det; /**< front/back face determinant */
+ ushort flags;
+ ushort pad;
+ struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */
+};
+
+
+
+/**
+ * Base class for all primitive drawing stages.
+ */
+struct draw_stage
+{
+ struct draw_context *draw; /**< parent context */
+
+ struct draw_stage *next; /**< next stage in pipeline */
+ const char *name; /**< for debugging */
+
+ struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
+ unsigned nr_tmps;
+
+ void (*point)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*line)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*tri)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*flush)( struct draw_stage *,
+ unsigned flags );
+
+ void (*reset_stipple_counter)( struct draw_stage * );
+
+ void (*destroy)( struct draw_stage * );
+};
+
+
+extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
+extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
+extern struct draw_stage *draw_offset_stage( struct draw_context *context );
+extern struct draw_stage *draw_clip_stage( struct draw_context *context );
+extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
+extern struct draw_stage *draw_cull_stage( struct draw_context *context );
+extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_point_stage( struct draw_context *context );
+extern struct draw_stage *draw_validate_stage( struct draw_context *context );
+
+
+extern void draw_free_temp_verts( struct draw_stage *stage );
+extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
+
+extern void draw_reset_vertex_ids( struct draw_context *draw );
+
+void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header);
+void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header);
+void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header);
+
+
+
+/**
+ * Get a writeable copy of a vertex.
+ * \param stage drawing stage info
+ * \param vert the vertex to copy (source)
+ * \param idx index into stage's tmp[] array to put the copy (dest)
+ * \return pointer to the copied vertex
+ */
+static INLINE struct vertex_header *
+dup_vert( struct draw_stage *stage,
+ const struct vertex_header *vert,
+ unsigned idx )
+{
+ struct vertex_header *tmp = stage->tmp[idx];
+ const uint vsize = sizeof(struct vertex_header)
+ + stage->draw->vs.num_vs_outputs * 4 * sizeof(float);
+ memcpy(tmp, vert, vsize);
+ tmp->vertex_id = UNDEFINED_VERTEX_ID;
+ return tmp;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
new file mode 100644
index 0000000000..9f956715a2
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -0,0 +1,921 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * AA line stage: AA lines are converted to texture mapped triangles.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "draw_context.h"
+#include "draw_private.h"
+#include "draw_pipe.h"
+
+
+/**
+ * Max texture level for the alpha texture used for antialiasing
+ */
+#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */
+
+
+/**
+ * Subclass of pipe_shader_state to carry extra fragment shader info.
+ */
+struct aaline_fragment_shader
+{
+ struct pipe_shader_state state;
+ void *driver_fs;
+ void *aaline_fs;
+ uint sampler_unit;
+ int generic_attrib; /**< texcoord/generic used for texture */
+};
+
+
+/**
+ * Subclass of draw_stage
+ */
+struct aaline_stage
+{
+ struct draw_stage stage;
+
+ float half_line_width;
+
+ /** For AA lines, this is the vertex attrib slot for the new texcoords */
+ uint tex_slot;
+ /** position, not necessarily output zero */
+ uint pos_slot;
+
+ void *sampler_cso;
+ struct pipe_texture *texture;
+ uint num_samplers;
+ uint num_textures;
+
+
+ /*
+ * Currently bound state
+ */
+ struct aaline_fragment_shader *fs;
+ struct {
+ void *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ } state;
+
+ /*
+ * Driver interface/override functions
+ */
+ void * (*driver_create_fs_state)(struct pipe_context *,
+ const struct pipe_shader_state *);
+ void (*driver_bind_fs_state)(struct pipe_context *, void *);
+ void (*driver_delete_fs_state)(struct pipe_context *, void *);
+
+ void (*driver_bind_sampler_states)(struct pipe_context *, unsigned,
+ void **);
+ void (*driver_set_sampler_textures)(struct pipe_context *, unsigned,
+ struct pipe_texture **);
+
+ struct pipe_context *pipe;
+};
+
+
+
+/**
+ * Subclass of tgsi_transform_context, used for transforming the
+ * user's fragment shader to add the special AA instructions.
+ */
+struct aa_transform_context {
+ struct tgsi_transform_context base;
+ uint tempsUsed; /**< bitmask */
+ int colorOutput; /**< which output is the primary color */
+ uint samplersUsed; /**< bitfield of samplers used */
+ int freeSampler; /** an available sampler for the pstipple */
+ int maxInput, maxGeneric; /**< max input index found */
+ int colorTemp, texTemp; /**< temp registers */
+ boolean firstInstruction;
+};
+
+
+/**
+ * TGSI declaration transform callback.
+ * Look for a free sampler, a free input attrib, and two free temp regs.
+ */
+static void
+aa_transform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+
+ if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+ decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
+ decl->Semantic.SemanticIndex == 0) {
+ aactx->colorOutput = decl->DeclarationRange.First;
+ }
+ else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+ uint i;
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
+ aactx->samplersUsed |= 1 << i;
+ }
+ }
+ else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ if ((int) decl->DeclarationRange.Last > aactx->maxInput)
+ aactx->maxInput = decl->DeclarationRange.Last;
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
+ (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
+ aactx->maxGeneric = decl->Semantic.SemanticIndex;
+ }
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
+ aactx->tempsUsed |= (1 << i);
+ }
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+
+/**
+ * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
+ */
+static int
+free_bit(uint bitfield)
+{
+ int i;
+ for (i = 0; i < 32; i++) {
+ if ((bitfield & (1 << i)) == 0)
+ return i;
+ }
+ return -1;
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ * Replace writes to result.color w/ a temp reg.
+ * Upon END instruction, insert texture sampling code for antialiasing.
+ */
+static void
+aa_transform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+
+ if (aactx->firstInstruction) {
+ /* emit our new declarations before the first instruction */
+
+ struct tgsi_full_declaration decl;
+ uint i;
+
+ /* find free sampler */
+ aactx->freeSampler = free_bit(aactx->samplersUsed);
+ if (aactx->freeSampler >= PIPE_MAX_SAMPLERS)
+ aactx->freeSampler = PIPE_MAX_SAMPLERS - 1;
+
+ /* find two free temp regs */
+ for (i = 0; i < 32; i++) {
+ if ((aactx->tempsUsed & (1 << i)) == 0) {
+ /* found a free temp */
+ if (aactx->colorTemp < 0)
+ aactx->colorTemp = i;
+ else if (aactx->texTemp < 0)
+ aactx->texTemp = i;
+ else
+ break;
+ }
+ }
+ assert(aactx->colorTemp >= 0);
+ assert(aactx->texTemp >= 0);
+
+ /* declare new generic input/texcoord */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ /* XXX this could be linear... */
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+ decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->maxInput + 1;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* declare new sampler */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->freeSampler;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* declare new temp regs */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->texTemp;
+ ctx->emit_declaration(ctx, &decl);
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->colorTemp;
+ ctx->emit_declaration(ctx, &decl);
+
+ aactx->firstInstruction = FALSE;
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END &&
+ aactx->colorOutput != -1) {
+ struct tgsi_full_instruction newInst;
+
+ /* TEX */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler;
+
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* MOV rgb */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+ newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* MUL alpha */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+ newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* END */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_END;
+ newInst.Instruction.NumDstRegs = 0;
+ newInst.Instruction.NumSrcRegs = 0;
+ ctx->emit_instruction(ctx, &newInst);
+ }
+ else {
+ /* Not an END instruction.
+ * Look for writes to result.color and replace with colorTemp reg.
+ */
+ uint i;
+
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
+ dst->DstRegister.Index == aactx->colorOutput) {
+ dst->DstRegister.File = TGSI_FILE_TEMPORARY;
+ dst->DstRegister.Index = aactx->colorTemp;
+ }
+ }
+
+ ctx->emit_instruction(ctx, inst);
+ }
+}
+
+
+/**
+ * Generate the frag shader we'll use for drawing AA lines.
+ * This will be the user's shader plus some texture/modulate instructions.
+ */
+static boolean
+generate_aaline_fs(struct aaline_stage *aaline)
+{
+ const struct pipe_shader_state *orig_fs = &aaline->fs->state;
+ struct pipe_shader_state aaline_fs;
+ struct aa_transform_context transform;
+
+#define MAX 1000
+
+ aaline_fs = *orig_fs; /* copy to init */
+ aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (aaline_fs.tokens == NULL)
+ return FALSE;
+
+ memset(&transform, 0, sizeof(transform));
+ transform.colorOutput = -1;
+ transform.maxInput = -1;
+ transform.maxGeneric = -1;
+ transform.colorTemp = -1;
+ transform.texTemp = -1;
+ transform.firstInstruction = TRUE;
+ transform.base.transform_instruction = aa_transform_inst;
+ transform.base.transform_declaration = aa_transform_decl;
+
+ tgsi_transform_shader(orig_fs->tokens,
+ (struct tgsi_token *) aaline_fs.tokens,
+ MAX, &transform.base);
+
+#if 0 /* DEBUG */
+ tgsi_dump(orig_fs->tokens, 0);
+ tgsi_dump(aaline_fs.tokens, 0);
+#endif
+
+ aaline->fs->sampler_unit = transform.freeSampler;
+
+ aaline->fs->aaline_fs
+ = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs);
+ if (aaline->fs->aaline_fs == NULL)
+ goto fail;
+
+ aaline->fs->generic_attrib = transform.maxGeneric + 1;
+ FREE((void *)aaline_fs.tokens);
+ return TRUE;
+
+fail:
+ FREE((void *)aaline_fs.tokens);
+ return FALSE;
+}
+
+
+/**
+ * Create the texture map we'll use for antialiasing the lines.
+ */
+static boolean
+aaline_create_texture(struct aaline_stage *aaline)
+{
+ struct pipe_context *pipe = aaline->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_texture texTemp;
+ uint level;
+
+ memset(&texTemp, 0, sizeof(texTemp));
+ texTemp.target = PIPE_TEXTURE_2D;
+ texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
+ texTemp.last_level = MAX_TEXTURE_LEVEL;
+ texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.depth[0] = 1;
+ pf_get_block(texTemp.format, &texTemp.block);
+
+ aaline->texture = screen->texture_create(screen, &texTemp);
+ if (!aaline->texture)
+ return FALSE;
+
+ /* Fill in mipmap images.
+ * Basically each level is solid opaque, except for the outermost
+ * texels which are zero. Special case the 1x1 and 2x2 levels.
+ */
+ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
+ struct pipe_transfer *transfer;
+ const uint size = aaline->texture->width[level];
+ ubyte *data;
+ uint i, j;
+
+ assert(aaline->texture->width[level] == aaline->texture->height[level]);
+
+ /* This texture is new, no need to flush.
+ */
+ transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0,
+ PIPE_TRANSFER_WRITE, 0, 0, size, size);
+ data = screen->transfer_map(screen, transfer);
+ if (data == NULL)
+ return FALSE;
+
+ for (i = 0; i < size; i++) {
+ for (j = 0; j < size; j++) {
+ ubyte d;
+ if (size == 1) {
+ d = 255;
+ }
+ else if (size == 2) {
+ d = 200; /* tuneable */
+ }
+ else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) {
+ d = 0;
+ }
+ else {
+ d = 255;
+ }
+ data[i * transfer->stride + j] = d;
+ }
+ }
+
+ /* unmap */
+ screen->transfer_unmap(screen, transfer);
+ screen->tex_transfer_destroy(transfer);
+ }
+ return TRUE;
+}
+
+
+/**
+ * Create the sampler CSO that'll be used for antialiasing.
+ * By using a mipmapped texture, we don't have to generate a different
+ * texture image for each line size.
+ */
+static boolean
+aaline_create_sampler(struct aaline_stage *aaline)
+{
+ struct pipe_sampler_state sampler;
+ struct pipe_context *pipe = aaline->pipe;
+
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
+ sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.normalized_coords = 1;
+ sampler.min_lod = 0.0f;
+ sampler.max_lod = MAX_TEXTURE_LEVEL;
+
+ aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ if (aaline->sampler_cso == NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/**
+ * When we're about to draw our first AA line in a batch, this function is
+ * called to tell the driver to bind our modified fragment shader.
+ */
+static boolean
+bind_aaline_fragment_shader(struct aaline_stage *aaline)
+{
+ struct draw_context *draw = aaline->stage.draw;
+
+ if (!aaline->fs->aaline_fs &&
+ !generate_aaline_fs(aaline))
+ return FALSE;
+
+ draw->suspend_flushing = TRUE;
+ aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs);
+ draw->suspend_flushing = FALSE;
+
+ return TRUE;
+}
+
+
+
+static INLINE struct aaline_stage *
+aaline_stage( struct draw_stage *stage )
+{
+ return (struct aaline_stage *) stage;
+}
+
+
+/**
+ * Draw a wide line by drawing a quad, using geometry which will
+ * fullfill GL's antialiased line requirements.
+ */
+static void
+aaline_line(struct draw_stage *stage, struct prim_header *header)
+{
+ const struct aaline_stage *aaline = aaline_stage(stage);
+ const float half_width = aaline->half_line_width;
+ struct prim_header tri;
+ struct vertex_header *v[8];
+ uint texPos = aaline->tex_slot;
+ uint posPos = aaline->pos_slot;
+ float *pos, *tex;
+ float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0];
+ float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1];
+ double a = atan2(dy, dx);
+ float c_a = (float) cos(a), s_a = (float) sin(a);
+ uint i;
+
+ /* XXX the ends of lines aren't quite perfect yet, but probably passable */
+ dx = 0.5F * half_width;
+ dy = half_width;
+
+ /* allocate/dup new verts */
+ for (i = 0; i < 8; i++) {
+ v[i] = dup_vert(stage, header->v[i/4], i);
+ }
+
+ /*
+ * Quad strip for line from v0 to v1 (*=endpoints):
+ *
+ * 1 3 5 7
+ * +---+---------------------+---+
+ * | |
+ * | *v0 v1* |
+ * | |
+ * +---+---------------------+---+
+ * 0 2 4 6
+ */
+
+ /* new verts */
+ pos = v[0]->data[posPos];
+ pos[0] += (-dx * c_a - dy * s_a);
+ pos[1] += (-dx * s_a + dy * c_a);
+
+ pos = v[1]->data[posPos];
+ pos[0] += (-dx * c_a - -dy * s_a);
+ pos[1] += (-dx * s_a + -dy * c_a);
+
+ pos = v[2]->data[posPos];
+ pos[0] += ( dx * c_a - dy * s_a);
+ pos[1] += ( dx * s_a + dy * c_a);
+
+ pos = v[3]->data[posPos];
+ pos[0] += ( dx * c_a - -dy * s_a);
+ pos[1] += ( dx * s_a + -dy * c_a);
+
+ pos = v[4]->data[posPos];
+ pos[0] += (-dx * c_a - dy * s_a);
+ pos[1] += (-dx * s_a + dy * c_a);
+
+ pos = v[5]->data[posPos];
+ pos[0] += (-dx * c_a - -dy * s_a);
+ pos[1] += (-dx * s_a + -dy * c_a);
+
+ pos = v[6]->data[posPos];
+ pos[0] += ( dx * c_a - dy * s_a);
+ pos[1] += ( dx * s_a + dy * c_a);
+
+ pos = v[7]->data[posPos];
+ pos[0] += ( dx * c_a - -dy * s_a);
+ pos[1] += ( dx * s_a + -dy * c_a);
+
+ /* new texcoords */
+ tex = v[0]->data[texPos];
+ ASSIGN_4V(tex, 0, 0, 0, 1);
+
+ tex = v[1]->data[texPos];
+ ASSIGN_4V(tex, 0, 1, 0, 1);
+
+ tex = v[2]->data[texPos];
+ ASSIGN_4V(tex, .5, 0, 0, 1);
+
+ tex = v[3]->data[texPos];
+ ASSIGN_4V(tex, .5, 1, 0, 1);
+
+ tex = v[4]->data[texPos];
+ ASSIGN_4V(tex, .5, 0, 0, 1);
+
+ tex = v[5]->data[texPos];
+ ASSIGN_4V(tex, .5, 1, 0, 1);
+
+ tex = v[6]->data[texPos];
+ ASSIGN_4V(tex, 1, 0, 0, 1);
+
+ tex = v[7]->data[texPos];
+ ASSIGN_4V(tex, 1, 1, 0, 1);
+
+ /* emit 6 tris for the quad strip */
+ tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6];
+ stage->next->tri( stage->next, &tri );
+}
+
+
+static void
+aaline_first_line(struct draw_stage *stage, struct prim_header *header)
+{
+ auto struct aaline_stage *aaline = aaline_stage(stage);
+ struct draw_context *draw = stage->draw;
+ struct pipe_context *pipe = aaline->pipe;
+ uint num_samplers;
+
+ assert(draw->rasterizer->line_smooth);
+
+ if (draw->rasterizer->line_width <= 3.0)
+ aaline->half_line_width = 1.5f;
+ else
+ aaline->half_line_width = 0.5f * draw->rasterizer->line_width;
+
+ /*
+ * Bind (generate) our fragprog, sampler and texture
+ */
+ if (!bind_aaline_fragment_shader(aaline)) {
+ stage->line = draw_pipe_passthrough_line;
+ stage->line(stage, header);
+ return;
+ }
+
+ /* update vertex attrib info */
+ aaline->tex_slot = draw->vs.num_vs_outputs;
+ aaline->pos_slot = draw->vs.position_output;
+
+ /* advertise the extra post-transformed vertex attribute */
+ draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+ draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib;
+ draw->extra_vp_outputs.slot = aaline->tex_slot;
+
+ /* how many samplers? */
+ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
+ num_samplers = MAX2(aaline->num_textures, aaline->num_samplers);
+ num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1);
+
+ aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso;
+ pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit],
+ aaline->texture);
+
+ draw->suspend_flushing = TRUE;
+ aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler);
+ aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture);
+ draw->suspend_flushing = FALSE;
+
+ /* now really draw first line */
+ stage->line = aaline_line;
+ stage->line(stage, header);
+}
+
+
+static void
+aaline_flush(struct draw_stage *stage, unsigned flags)
+{
+ struct draw_context *draw = stage->draw;
+ struct aaline_stage *aaline = aaline_stage(stage);
+ struct pipe_context *pipe = aaline->pipe;
+
+ stage->line = aaline_first_line;
+ stage->next->flush( stage->next, flags );
+
+ /* restore original frag shader, texture, sampler state */
+ draw->suspend_flushing = TRUE;
+ aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs);
+ aaline->driver_bind_sampler_states(pipe, aaline->num_samplers,
+ aaline->state.sampler);
+ aaline->driver_set_sampler_textures(pipe, aaline->num_textures,
+ aaline->state.texture);
+ draw->suspend_flushing = FALSE;
+
+ draw->extra_vp_outputs.slot = 0;
+}
+
+
+static void
+aaline_reset_stipple_counter(struct draw_stage *stage)
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void
+aaline_destroy(struct draw_stage *stage)
+{
+ struct aaline_stage *aaline = aaline_stage(stage);
+ uint i;
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&aaline->state.texture[i], NULL);
+ }
+
+ if (aaline->sampler_cso)
+ aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso);
+
+ if (aaline->texture)
+ pipe_texture_reference(&aaline->texture, NULL);
+
+ draw_free_temp_verts( stage );
+
+ FREE( stage );
+}
+
+
+static struct aaline_stage *
+draw_aaline_stage(struct draw_context *draw)
+{
+ struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);
+ if (aaline == NULL)
+ return NULL;
+
+ if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
+ goto fail;
+
+ aaline->stage.draw = draw;
+ aaline->stage.name = "aaline";
+ aaline->stage.next = NULL;
+ aaline->stage.point = draw_pipe_passthrough_point;
+ aaline->stage.line = aaline_first_line;
+ aaline->stage.tri = draw_pipe_passthrough_tri;
+ aaline->stage.flush = aaline_flush;
+ aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
+ aaline->stage.destroy = aaline_destroy;
+
+ return aaline;
+
+ fail:
+ if (aaline)
+ aaline_destroy(&aaline->stage);
+
+ return NULL;
+}
+
+
+static struct aaline_stage *
+aaline_stage_from_pipe(struct pipe_context *pipe)
+{
+ struct draw_context *draw = (struct draw_context *) pipe->draw;
+ return aaline_stage(draw->pipeline.aaline);
+}
+
+
+/**
+ * This function overrides the driver's create_fs_state() function and
+ * will typically be called by the state tracker.
+ */
+static void *
+aaline_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *fs)
+{
+ struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+ struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader);
+ if (aafs == NULL)
+ return NULL;
+
+ aafs->state = *fs;
+
+ /* pass-through */
+ aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
+
+ return aafs;
+}
+
+
+static void
+aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+ struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
+
+ /* save current */
+ aaline->fs = aafs;
+ /* pass-through */
+ aaline->driver_bind_fs_state(aaline->pipe,
+ (aafs ? aafs->driver_fs : NULL));
+}
+
+
+static void
+aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+ struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
+ /* pass-through */
+ aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs);
+
+ if (aafs->aaline_fs)
+ aaline->driver_delete_fs_state(aaline->pipe, aafs->aaline_fs);
+
+ FREE(aafs);
+}
+
+
+static void
+aaline_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+
+ /* save current */
+ memcpy(aaline->state.sampler, sampler, num * sizeof(void *));
+ aaline->num_samplers = num;
+
+ /* pass-through */
+ aaline->driver_bind_sampler_states(aaline->pipe, num, sampler);
+}
+
+
+static void
+aaline_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num, struct pipe_texture **texture)
+{
+ struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+ uint i;
+
+ /* save current */
+ for (i = 0; i < num; i++) {
+ pipe_texture_reference(&aaline->state.texture[i], texture[i]);
+ }
+ for ( ; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&aaline->state.texture[i], NULL);
+ }
+ aaline->num_textures = num;
+
+ /* pass-through */
+ aaline->driver_set_sampler_textures(aaline->pipe, num, texture);
+}
+
+
+/**
+ * Called by drivers that want to install this AA line prim stage
+ * into the draw module's pipeline. This will not be used if the
+ * hardware has native support for AA lines.
+ */
+boolean
+draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
+{
+ struct aaline_stage *aaline;
+
+ pipe->draw = (void *) draw;
+
+ /*
+ * Create / install AA line drawing / prim stage
+ */
+ aaline = draw_aaline_stage( draw );
+ if (!aaline)
+ goto fail;
+
+ aaline->pipe = pipe;
+
+ /* create special texture, sampler state */
+ if (!aaline_create_texture(aaline))
+ goto fail;
+
+ if (!aaline_create_sampler(aaline))
+ goto fail;
+
+ /* save original driver functions */
+ aaline->driver_create_fs_state = pipe->create_fs_state;
+ aaline->driver_bind_fs_state = pipe->bind_fs_state;
+ aaline->driver_delete_fs_state = pipe->delete_fs_state;
+
+ aaline->driver_bind_sampler_states = pipe->bind_sampler_states;
+ aaline->driver_set_sampler_textures = pipe->set_sampler_textures;
+
+ /* override the driver's functions */
+ pipe->create_fs_state = aaline_create_fs_state;
+ pipe->bind_fs_state = aaline_bind_fs_state;
+ pipe->delete_fs_state = aaline_delete_fs_state;
+
+ pipe->bind_sampler_states = aaline_bind_sampler_states;
+ pipe->set_sampler_textures = aaline_set_sampler_textures;
+
+ /* Install once everything is known to be OK:
+ */
+ draw->pipeline.aaline = &aaline->stage;
+
+ return TRUE;
+
+ fail:
+ if (aaline)
+ aaline->stage.destroy( &aaline->stage );
+
+ return FALSE;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
new file mode 100644
index 0000000000..ae1712fe12
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -0,0 +1,884 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * AA point stage: AA points are converted to quads and rendered with a
+ * special fragment shader. Another approach would be to use a texture
+ * map image of a point, but experiments indicate the quality isn't nearly
+ * as good as this approach.
+ *
+ * Note: this looks a lot like draw_aaline.c but there's actually little
+ * if any code that can be shared.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw_context.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
+
+
+/*
+ * Enabling NORMALIZE might give _slightly_ better results.
+ * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
+ * d=x*x+y*y. Since we're working with a unit circle, the later seems
+ * close enough and saves some costly instructions.
+ */
+#define NORMALIZE 0
+
+
+/**
+ * Subclass of pipe_shader_state to carry extra fragment shader info.
+ */
+struct aapoint_fragment_shader
+{
+ struct pipe_shader_state state;
+ void *driver_fs; /**< the regular shader */
+ void *aapoint_fs; /**< the aa point-augmented shader */
+ int generic_attrib; /**< The generic input attrib/texcoord we'll use */
+};
+
+
+/**
+ * Subclass of draw_stage
+ */
+struct aapoint_stage
+{
+ struct draw_stage stage;
+
+ int psize_slot;
+ float radius;
+
+ /** this is the vertex attrib slot for the new texcoords */
+ uint tex_slot;
+ uint pos_slot;
+
+ /*
+ * Currently bound state
+ */
+ struct aapoint_fragment_shader *fs;
+
+ /*
+ * Driver interface/override functions
+ */
+ void * (*driver_create_fs_state)(struct pipe_context *,
+ const struct pipe_shader_state *);
+ void (*driver_bind_fs_state)(struct pipe_context *, void *);
+ void (*driver_delete_fs_state)(struct pipe_context *, void *);
+
+ struct pipe_context *pipe;
+};
+
+
+
+/**
+ * Subclass of tgsi_transform_context, used for transforming the
+ * user's fragment shader to add the special AA instructions.
+ */
+struct aa_transform_context {
+ struct tgsi_transform_context base;
+ uint tempsUsed; /**< bitmask */
+ int colorOutput; /**< which output is the primary color */
+ int maxInput, maxGeneric; /**< max input index found */
+ int tmp0, colorTemp; /**< temp registers */
+ boolean firstInstruction;
+};
+
+
+/**
+ * TGSI declaration transform callback.
+ * Look for two free temp regs and available input reg for new texcoords.
+ */
+static void
+aa_transform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+
+ if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+ decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
+ decl->Semantic.SemanticIndex == 0) {
+ aactx->colorOutput = decl->DeclarationRange.First;
+ }
+ else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ if ((int) decl->DeclarationRange.Last > aactx->maxInput)
+ aactx->maxInput = decl->DeclarationRange.Last;
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
+ (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
+ aactx->maxGeneric = decl->Semantic.SemanticIndex;
+ }
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
+ aactx->tempsUsed |= (1 << i);
+ }
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ * Replace writes to result.color w/ a temp reg.
+ * Upon END instruction, insert texture sampling code for antialiasing.
+ */
+static void
+aa_transform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+ struct tgsi_full_instruction newInst;
+
+ if (aactx->firstInstruction) {
+ /* emit our new declarations before the first instruction */
+
+ struct tgsi_full_declaration decl;
+ const int texInput = aactx->maxInput + 1;
+ int tmp0;
+ uint i;
+
+ /* find two free temp regs */
+ for (i = 0; i < 32; i++) {
+ if ((aactx->tempsUsed & (1 << i)) == 0) {
+ /* found a free temp */
+ if (aactx->tmp0 < 0)
+ aactx->tmp0 = i;
+ else if (aactx->colorTemp < 0)
+ aactx->colorTemp = i;
+ else
+ break;
+ }
+ }
+
+ assert(aactx->colorTemp != aactx->tmp0);
+
+ tmp0 = aactx->tmp0;
+
+ /* declare new generic input/texcoord */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ /* XXX this could be linear... */
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+ decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = texInput;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* declare new temp regs */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = tmp0;
+ ctx->emit_declaration(ctx, &decl);
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->colorTemp;
+ ctx->emit_declaration(ctx, &decl);
+
+ aactx->firstInstruction = FALSE;
+
+
+ /*
+ * Emit code to compute fragment coverage, kill if outside point radius
+ *
+ * Temp reg0 usage:
+ * t0.x = distance of fragment from center point
+ * t0.y = boolean, is t0.x > 1.0, also misc temp usage
+ * t0.z = temporary for computing 1/(1-k) value
+ * t0.w = final coverage value
+ */
+
+ /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ ctx->emit_instruction(ctx, &newInst);
+
+#if NORMALIZE /* OPTIONAL normalization of length */
+ /* RSQ t0.x, t0.x; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* RCP t0.x, t0.x; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ ctx->emit_instruction(ctx, &newInst);
+#endif
+
+ /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
+ newInst.Instruction.NumDstRegs = 0;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
+ ctx->emit_instruction(ctx, &newInst);
+
+
+ /* compute coverage factor = (1-d)/(1-k) */
+
+ /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* RCP t0.z, t0.z; # t0.z = 1 / m */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* SUB t0.y, 1, t0.x; # d = 1 - d */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* CMP t0.w, -t0.y, tex.w, t0.w;
+ * # if -t0.y < 0 then
+ * t0.w = 1
+ * else
+ * t0.w = t0.w
+ */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+ newInst.Instruction.NumSrcRegs = 3;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+ ctx->emit_instruction(ctx, &newInst);
+
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+ /* add alpha modulation code at tail of program */
+
+ /* MOV result.color.xyz, colorTemp; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+ newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* MUL result.color.w, colorTemp, tmp0.w; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+ newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
+ ctx->emit_instruction(ctx, &newInst);
+ }
+ else {
+ /* Not an END instruction.
+ * Look for writes to result.color and replace with colorTemp reg.
+ */
+ uint i;
+
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
+ dst->DstRegister.Index == aactx->colorOutput) {
+ dst->DstRegister.File = TGSI_FILE_TEMPORARY;
+ dst->DstRegister.Index = aactx->colorTemp;
+ }
+ }
+ }
+
+ ctx->emit_instruction(ctx, inst);
+}
+
+
+/**
+ * Generate the frag shader we'll use for drawing AA points.
+ * This will be the user's shader plus some texture/modulate instructions.
+ */
+static boolean
+generate_aapoint_fs(struct aapoint_stage *aapoint)
+{
+ const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
+ struct pipe_shader_state aapoint_fs;
+ struct aa_transform_context transform;
+
+#define MAX 1000
+
+ aapoint_fs = *orig_fs; /* copy to init */
+ aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (aapoint_fs.tokens == NULL)
+ return FALSE;
+
+ memset(&transform, 0, sizeof(transform));
+ transform.colorOutput = -1;
+ transform.maxInput = -1;
+ transform.maxGeneric = -1;
+ transform.colorTemp = -1;
+ transform.tmp0 = -1;
+ transform.firstInstruction = TRUE;
+ transform.base.transform_instruction = aa_transform_inst;
+ transform.base.transform_declaration = aa_transform_decl;
+
+ tgsi_transform_shader(orig_fs->tokens,
+ (struct tgsi_token *) aapoint_fs.tokens,
+ MAX, &transform.base);
+
+#if 0 /* DEBUG */
+ printf("draw_aapoint, orig shader:\n");
+ tgsi_dump(orig_fs->tokens, 0);
+ printf("draw_aapoint, new shader:\n");
+ tgsi_dump(aapoint_fs.tokens, 0);
+#endif
+
+ aapoint->fs->aapoint_fs
+ = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
+ if (aapoint->fs->aapoint_fs == NULL)
+ goto fail;
+
+ aapoint->fs->generic_attrib = transform.maxGeneric + 1;
+ FREE((void *)aapoint_fs.tokens);
+ return TRUE;
+
+fail:
+ FREE((void *)aapoint_fs.tokens);
+ return FALSE;
+}
+
+
+/**
+ * When we're about to draw our first AA point in a batch, this function is
+ * called to tell the driver to bind our modified fragment shader.
+ */
+static boolean
+bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
+{
+ struct draw_context *draw = aapoint->stage.draw;
+
+ if (!aapoint->fs->aapoint_fs &&
+ !generate_aapoint_fs(aapoint))
+ return FALSE;
+
+ draw->suspend_flushing = TRUE;
+ aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
+ draw->suspend_flushing = FALSE;
+
+ return TRUE;
+}
+
+
+
+static INLINE struct aapoint_stage *
+aapoint_stage( struct draw_stage *stage )
+{
+ return (struct aapoint_stage *) stage;
+}
+
+
+
+
+/**
+ * Draw an AA point by drawing a quad.
+ */
+static void
+aapoint_point(struct draw_stage *stage, struct prim_header *header)
+{
+ const struct aapoint_stage *aapoint = aapoint_stage(stage);
+ struct prim_header tri;
+ struct vertex_header *v[4];
+ uint texPos = aapoint->tex_slot;
+ uint pos_slot = aapoint->pos_slot;
+ float radius, *pos, *tex;
+ uint i;
+ float k;
+
+ if (aapoint->psize_slot >= 0) {
+ radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
+ }
+ else {
+ radius = aapoint->radius;
+ }
+
+ /*
+ * Note: the texcoords (generic attrib, really) we use are special:
+ * The S and T components simply vary from -1 to +1.
+ * The R component is k, below.
+ * The Q component is 1.0 and will used as a handy constant in the
+ * fragment shader.
+ */
+
+ /*
+ * k is the threshold distance from the point's center at which
+ * we begin alpha attenuation (the coverage value).
+ * Operating within a unit circle, we'll compute the fragment's
+ * distance 'd' from the center point using the texcoords.
+ * IF d > 1.0 THEN
+ * KILL fragment
+ * ELSE IF d > k THEN
+ * compute coverage in [0,1] proportional to d in [k, 1].
+ * ELSE
+ * coverage = 1.0; // full coverage
+ * ENDIF
+ *
+ * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
+ * avoid using IF/ELSE/ENDIF TGSI opcodes.
+ */
+
+#if !NORMALIZE
+ k = 1.0f / radius;
+ k = 1.0f - 2.0f * k + k * k;
+#else
+ k = 1.0f - 1.0f / radius;
+#endif
+
+ /* allocate/dup new verts */
+ for (i = 0; i < 4; i++) {
+ v[i] = dup_vert(stage, header->v[0], i);
+ }
+
+ /* new verts */
+ pos = v[0]->data[pos_slot];
+ pos[0] -= radius;
+ pos[1] -= radius;
+
+ pos = v[1]->data[pos_slot];
+ pos[0] += radius;
+ pos[1] -= radius;
+
+ pos = v[2]->data[pos_slot];
+ pos[0] += radius;
+ pos[1] += radius;
+
+ pos = v[3]->data[pos_slot];
+ pos[0] -= radius;
+ pos[1] += radius;
+
+ /* new texcoords */
+ tex = v[0]->data[texPos];
+ ASSIGN_4V(tex, -1, -1, k, 1);
+
+ tex = v[1]->data[texPos];
+ ASSIGN_4V(tex, 1, -1, k, 1);
+
+ tex = v[2]->data[texPos];
+ ASSIGN_4V(tex, 1, 1, k, 1);
+
+ tex = v[3]->data[texPos];
+ ASSIGN_4V(tex, -1, 1, k, 1);
+
+ /* emit 2 tris for the quad strip */
+ tri.v[0] = v[0];
+ tri.v[1] = v[1];
+ tri.v[2] = v[2];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[0];
+ tri.v[1] = v[2];
+ tri.v[2] = v[3];
+ stage->next->tri( stage->next, &tri );
+}
+
+
+static void
+aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
+{
+ auto struct aapoint_stage *aapoint = aapoint_stage(stage);
+ struct draw_context *draw = stage->draw;
+
+ assert(draw->rasterizer->point_smooth);
+
+ if (draw->rasterizer->point_size <= 2.0)
+ aapoint->radius = 1.0;
+ else
+ aapoint->radius = 0.5f * draw->rasterizer->point_size;
+
+ /*
+ * Bind (generate) our fragprog.
+ */
+ bind_aapoint_fragment_shader(aapoint);
+
+ /* update vertex attrib info */
+ aapoint->tex_slot = draw->vs.num_vs_outputs;
+ assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
+
+ aapoint->pos_slot = draw->vs.position_output;
+
+ draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+ draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
+ draw->extra_vp_outputs.slot = aapoint->tex_slot;
+
+ /* find psize slot in post-transform vertex */
+ aapoint->psize_slot = -1;
+ if (draw->rasterizer->point_size_per_vertex) {
+ /* find PSIZ vertex output */
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ uint i;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
+ aapoint->psize_slot = i;
+ break;
+ }
+ }
+ }
+
+ /* now really draw first point */
+ stage->point = aapoint_point;
+ stage->point(stage, header);
+}
+
+
+static void
+aapoint_flush(struct draw_stage *stage, unsigned flags)
+{
+ struct draw_context *draw = stage->draw;
+ struct aapoint_stage *aapoint = aapoint_stage(stage);
+ struct pipe_context *pipe = aapoint->pipe;
+
+ stage->point = aapoint_first_point;
+ stage->next->flush( stage->next, flags );
+
+ /* restore original frag shader */
+ draw->suspend_flushing = TRUE;
+ aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
+ draw->suspend_flushing = FALSE;
+
+ draw->extra_vp_outputs.slot = 0;
+}
+
+
+static void
+aapoint_reset_stipple_counter(struct draw_stage *stage)
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void
+aapoint_destroy(struct draw_stage *stage)
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+static struct aapoint_stage *
+draw_aapoint_stage(struct draw_context *draw)
+{
+ struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
+ if (aapoint == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
+ goto fail;
+
+ aapoint->stage.draw = draw;
+ aapoint->stage.name = "aapoint";
+ aapoint->stage.next = NULL;
+ aapoint->stage.point = aapoint_first_point;
+ aapoint->stage.line = draw_pipe_passthrough_line;
+ aapoint->stage.tri = draw_pipe_passthrough_tri;
+ aapoint->stage.flush = aapoint_flush;
+ aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
+ aapoint->stage.destroy = aapoint_destroy;
+
+ return aapoint;
+
+ fail:
+ if (aapoint)
+ aapoint_destroy(&aapoint->stage);
+
+ return NULL;
+
+}
+
+
+static struct aapoint_stage *
+aapoint_stage_from_pipe(struct pipe_context *pipe)
+{
+ struct draw_context *draw = (struct draw_context *) pipe->draw;
+ return aapoint_stage(draw->pipeline.aapoint);
+}
+
+
+/**
+ * This function overrides the driver's create_fs_state() function and
+ * will typically be called by the state tracker.
+ */
+static void *
+aapoint_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *fs)
+{
+ struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
+ struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
+ if (aafs == NULL)
+ return NULL;
+
+ aafs->state = *fs;
+
+ /* pass-through */
+ aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
+
+ return aafs;
+}
+
+
+static void
+aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
+ struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
+ /* save current */
+ aapoint->fs = aafs;
+ /* pass-through */
+ aapoint->driver_bind_fs_state(aapoint->pipe,
+ (aafs ? aafs->driver_fs : NULL));
+}
+
+
+static void
+aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
+ struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
+
+ /* pass-through */
+ aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
+
+ if (aafs->aapoint_fs)
+ aapoint->driver_delete_fs_state(aapoint->pipe, aafs->aapoint_fs);
+
+ FREE(aafs);
+}
+
+
+/**
+ * Called by drivers that want to install this AA point prim stage
+ * into the draw module's pipeline. This will not be used if the
+ * hardware has native support for AA points.
+ */
+boolean
+draw_install_aapoint_stage(struct draw_context *draw,
+ struct pipe_context *pipe)
+{
+ struct aapoint_stage *aapoint;
+
+ pipe->draw = (void *) draw;
+
+ /*
+ * Create / install AA point drawing / prim stage
+ */
+ aapoint = draw_aapoint_stage( draw );
+ if (aapoint == NULL)
+ goto fail;
+
+ aapoint->pipe = pipe;
+
+ /* save original driver functions */
+ aapoint->driver_create_fs_state = pipe->create_fs_state;
+ aapoint->driver_bind_fs_state = pipe->bind_fs_state;
+ aapoint->driver_delete_fs_state = pipe->delete_fs_state;
+
+ /* override the driver's functions */
+ pipe->create_fs_state = aapoint_create_fs_state;
+ pipe->bind_fs_state = aapoint_bind_fs_state;
+ pipe->delete_fs_state = aapoint_delete_fs_state;
+
+ draw->pipeline.aapoint = &aapoint->stage;
+
+ return TRUE;
+
+ fail:
+ if (aapoint)
+ aapoint->stage.destroy( &aapoint->stage );
+
+ return FALSE;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
new file mode 100644
index 0000000000..0670268a19
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -0,0 +1,516 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Clipping stage
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_vs.h"
+#include "draw_pipe.h"
+
+
+#ifndef IS_NEGATIVE
+#define IS_NEGATIVE(X) ((X) < 0.0)
+#endif
+
+#ifndef DIFFERENT_SIGNS
+#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F)
+#endif
+
+#ifndef MAX_CLIPPED_VERTICES
+#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1)
+#endif
+
+
+
+struct clipper {
+ struct draw_stage stage; /**< base class */
+
+ /* Basically duplicate some of the flatshading logic here:
+ */
+ boolean flat;
+ uint num_color_attribs;
+ uint color_attribs[4]; /* front/back primary/secondary colors */
+
+ float (*plane)[4];
+};
+
+
+/* This is a bit confusing:
+ */
+static INLINE struct clipper *clipper_stage( struct draw_stage *stage )
+{
+ return (struct clipper *)stage;
+}
+
+
+#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT)))
+
+
+/* All attributes are float[4], so this is easy:
+ */
+static void interp_attr( float *fdst,
+ float t,
+ const float *fin,
+ const float *fout )
+{
+ fdst[0] = LINTERP( t, fout[0], fin[0] );
+ fdst[1] = LINTERP( t, fout[1], fin[1] );
+ fdst[2] = LINTERP( t, fout[2], fin[2] );
+ fdst[3] = LINTERP( t, fout[3], fin[3] );
+}
+
+static void copy_colors( struct draw_stage *stage,
+ struct vertex_header *dst,
+ const struct vertex_header *src )
+{
+ const struct clipper *clipper = clipper_stage(stage);
+ uint i;
+ for (i = 0; i < clipper->num_color_attribs; i++) {
+ const uint attr = clipper->color_attribs[i];
+ COPY_4FV(dst->data[attr], src->data[attr]);
+ }
+}
+
+
+
+/* Interpolate between two vertices to produce a third.
+ */
+static void interp( const struct clipper *clip,
+ struct vertex_header *dst,
+ float t,
+ const struct vertex_header *out,
+ const struct vertex_header *in )
+{
+ const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs;
+ const unsigned pos_attr = clip->stage.draw->vs.position_output;
+ unsigned j;
+
+ /* Vertex header.
+ */
+ {
+ dst->clipmask = 0;
+ dst->edgeflag = 0; /* will get overwritten later */
+ dst->pad = 0;
+ dst->vertex_id = UNDEFINED_VERTEX_ID;
+ }
+
+ /* Clip coordinates: interpolate normally
+ */
+ {
+ interp_attr(dst->clip, t, in->clip, out->clip);
+ }
+
+ /* Do the projective divide and insert window coordinates:
+ */
+ {
+ const float *pos = dst->clip;
+ const float *scale = clip->stage.draw->viewport.scale;
+ const float *trans = clip->stage.draw->viewport.translate;
+ const float oow = 1.0f / pos[3];
+
+ dst->data[pos_attr][0] = pos[0] * oow * scale[0] + trans[0];
+ dst->data[pos_attr][1] = pos[1] * oow * scale[1] + trans[1];
+ dst->data[pos_attr][2] = pos[2] * oow * scale[2] + trans[2];
+ dst->data[pos_attr][3] = oow;
+ }
+
+ /* Other attributes
+ */
+ for (j = 0; j < nr_attrs; j++) {
+ if (j != pos_attr)
+ interp_attr(dst->data[j], t, in->data[j], out->data[j]);
+ }
+}
+
+
+static void emit_poly( struct draw_stage *stage,
+ struct vertex_header **inlist,
+ unsigned n,
+ const struct prim_header *origPrim)
+{
+ struct prim_header header;
+ unsigned i;
+
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ /* later stages may need the determinant, but only the sign matters */
+ header.det = origPrim->det;
+ header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ header.pad = 0;
+
+ for (i = 2; i < n; i++, header.flags = edge_middle) {
+ header.v[0] = inlist[i-1];
+ header.v[1] = inlist[i];
+ header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
+
+ if (i == n-1)
+ header.flags |= edge_last;
+
+ if (0) {
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
+ uint j, k;
+ debug_printf("Clipped tri:\n");
+ for (j = 0; j < 3; j++) {
+ for (k = 0; k < vs->info.num_outputs; k++) {
+ debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
+ header.v[j]->data[k][0],
+ header.v[j]->data[k][1],
+ header.v[j]->data[k][2],
+ header.v[j]->data[k][3]);
+ }
+ }
+ }
+
+ stage->next->tri( stage->next, &header );
+ }
+}
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
+
+
+/* Clip a triangle against the viewport and user clip planes.
+ */
+static void
+do_clip_tri( struct draw_stage *stage,
+ struct prim_header *header,
+ unsigned clipmask )
+{
+ struct clipper *clipper = clipper_stage( stage );
+ struct vertex_header *a[MAX_CLIPPED_VERTICES];
+ struct vertex_header *b[MAX_CLIPPED_VERTICES];
+ struct vertex_header **inlist = a;
+ struct vertex_header **outlist = b;
+ unsigned tmpnr = 0;
+ unsigned n = 3;
+ unsigned i;
+
+ inlist[0] = header->v[0];
+ inlist[1] = header->v[1];
+ inlist[2] = header->v[2];
+
+ while (clipmask && n >= 3) {
+ const unsigned plane_idx = ffs(clipmask)-1;
+ const float *plane = clipper->plane[plane_idx];
+ struct vertex_header *vert_prev = inlist[0];
+ float dp_prev = dot4( vert_prev->clip, plane );
+ unsigned outcount = 0;
+
+ clipmask &= ~(1<<plane_idx);
+
+ inlist[n] = inlist[0]; /* prevent rotation of vertices */
+
+ for (i = 1; i <= n; i++) {
+ struct vertex_header *vert = inlist[i];
+
+ float dp = dot4( vert->clip, plane );
+
+ if (!IS_NEGATIVE(dp_prev)) {
+ outlist[outcount++] = vert_prev;
+ }
+
+ if (DIFFERENT_SIGNS(dp, dp_prev)) {
+ struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++];
+ outlist[outcount++] = new_vert;
+
+ if (IS_NEGATIVE(dp)) {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dp_prev from DIFFERENT_SIGNS, above.
+ */
+ float t = dp / (dp - dp_prev);
+ interp( clipper, new_vert, t, vert, vert_prev );
+
+ /* Force edgeflag true in this case:
+ */
+ new_vert->edgeflag = 1;
+ } else {
+ /* Coming back in.
+ */
+ float t = dp_prev / (dp_prev - dp);
+ interp( clipper, new_vert, t, vert_prev, vert );
+
+ /* Copy starting vert's edgeflag:
+ */
+ new_vert->edgeflag = vert_prev->edgeflag;
+ }
+ }
+
+ vert_prev = vert;
+ dp_prev = dp;
+ }
+
+ {
+ struct vertex_header **tmp = inlist;
+ inlist = outlist;
+ outlist = tmp;
+ n = outcount;
+ }
+ }
+
+ /* If flat-shading, copy color to new provoking vertex.
+ */
+ if (clipper->flat && inlist[0] != header->v[2]) {
+ if (1) {
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ }
+
+ copy_colors(stage, inlist[0], header->v[2]);
+ }
+
+
+
+ /* Emit the polygon as triangles to the setup stage:
+ */
+ if (n >= 3)
+ emit_poly( stage, inlist, n, header );
+}
+
+
+/* Clip a line against the viewport and user clip planes.
+ */
+static void
+do_clip_line( struct draw_stage *stage,
+ struct prim_header *header,
+ unsigned clipmask )
+{
+ const struct clipper *clipper = clipper_stage( stage );
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ const float *pos0 = v0->clip;
+ const float *pos1 = v1->clip;
+ float t0 = 0.0F;
+ float t1 = 0.0F;
+ struct prim_header newprim;
+
+ while (clipmask) {
+ const unsigned plane_idx = ffs(clipmask)-1;
+ const float *plane = clipper->plane[plane_idx];
+ const float dp0 = dot4( pos0, plane );
+ const float dp1 = dot4( pos1, plane );
+
+ if (dp1 < 0.0F) {
+ float t = dp1 / (dp1 - dp0);
+ t1 = MAX2(t1, t);
+ }
+
+ if (dp0 < 0.0F) {
+ float t = dp0 / (dp0 - dp1);
+ t0 = MAX2(t0, t);
+ }
+
+ if (t0 + t1 >= 1.0F)
+ return; /* discard */
+
+ clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */
+ }
+
+ if (v0->clipmask) {
+ interp( clipper, stage->tmp[0], t0, v0, v1 );
+
+ if (clipper->flat)
+ copy_colors(stage, stage->tmp[0], v0);
+
+ newprim.v[0] = stage->tmp[0];
+ }
+ else {
+ newprim.v[0] = v0;
+ }
+
+ if (v1->clipmask) {
+ interp( clipper, stage->tmp[1], t1, v1, v0 );
+ newprim.v[1] = stage->tmp[1];
+ }
+ else {
+ newprim.v[1] = v1;
+ }
+
+ stage->next->line( stage->next, &newprim );
+}
+
+
+static void
+clip_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ if (header->v[0]->clipmask == 0)
+ stage->next->point( stage->next, header );
+}
+
+
+static void
+clip_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ unsigned clipmask = (header->v[0]->clipmask |
+ header->v[1]->clipmask);
+
+ if (clipmask == 0) {
+ /* no clipping needed */
+ stage->next->line( stage->next, header );
+ }
+ else if ((header->v[0]->clipmask &
+ header->v[1]->clipmask) == 0) {
+ do_clip_line(stage, header, clipmask);
+ }
+ /* else, totally clipped */
+}
+
+
+static void
+clip_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ unsigned clipmask = (header->v[0]->clipmask |
+ header->v[1]->clipmask |
+ header->v[2]->clipmask);
+
+ if (clipmask == 0) {
+ /* no clipping needed */
+ stage->next->tri( stage->next, header );
+ }
+ else if ((header->v[0]->clipmask &
+ header->v[1]->clipmask &
+ header->v[2]->clipmask) == 0) {
+ do_clip_tri(stage, header, clipmask);
+ }
+}
+
+/* Update state. Could further delay this until we hit the first
+ * primitive that really requires clipping.
+ */
+static void
+clip_init_state( struct draw_stage *stage )
+{
+ struct clipper *clipper = clipper_stage( stage );
+
+ clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
+
+ if (clipper->flat) {
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
+ uint i;
+
+ clipper->num_color_attribs = 0;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
+ vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ clipper->color_attribs[clipper->num_color_attribs++] = i;
+ }
+ }
+ }
+
+ stage->tri = clip_tri;
+ stage->line = clip_line;
+}
+
+
+
+static void clip_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ clip_init_state( stage );
+ stage->tri( stage, header );
+}
+
+static void clip_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ clip_init_state( stage );
+ stage->line( stage, header );
+}
+
+
+static void clip_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->tri = clip_first_tri;
+ stage->line = clip_first_line;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void clip_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void clip_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Allocate a new clipper stage.
+ * \return pointer to new stage object
+ */
+struct draw_stage *draw_clip_stage( struct draw_context *draw )
+{
+ struct clipper *clipper = CALLOC_STRUCT(clipper);
+ if (clipper == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
+ goto fail;
+
+ clipper->stage.draw = draw;
+ clipper->stage.name = "clipper";
+ clipper->stage.point = clip_point;
+ clipper->stage.line = clip_first_line;
+ clipper->stage.tri = clip_first_tri;
+ clipper->stage.flush = clip_flush;
+ clipper->stage.reset_stipple_counter = clip_reset_stipple_counter;
+ clipper->stage.destroy = clip_destroy;
+
+ clipper->plane = draw->plane;
+
+ return &clipper->stage;
+
+ fail:
+ if (clipper)
+ clipper->stage.destroy( &clipper->stage );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
new file mode 100644
index 0000000000..0a70483858
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Drawing stage for polygon culling
+ */
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "draw_pipe.h"
+
+
+struct cull_stage {
+ struct draw_stage stage;
+ unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */
+};
+
+
+static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
+{
+ return (struct cull_stage *)stage;
+}
+
+
+
+
+static void cull_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ const unsigned pos = stage->draw->vs.position_output;
+
+ /* Window coords: */
+ const float *v0 = header->v[0]->data[pos];
+ const float *v1 = header->v[1]->data[pos];
+ const float *v2 = header->v[2]->data[pos];
+
+ /* edge vectors e = v0 - v2, f = v1 - v2 */
+ const float ex = v0[0] - v2[0];
+ const float ey = v0[1] - v2[1];
+ const float fx = v1[0] - v2[0];
+ const float fy = v1[1] - v2[1];
+
+ /* det = cross(e,f).z */
+ header->det = ex * fy - ey * fx;
+
+ if (header->det != 0) {
+ /* if (det < 0 then Z points toward camera and triangle is
+ * counter-clockwise winding.
+ */
+ unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
+
+ if ((winding & cull_stage(stage)->winding) == 0) {
+ /* triangle is not culled, pass to next stage */
+ stage->next->tri( stage->next, header );
+ }
+ }
+}
+
+static void cull_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct cull_stage *cull = cull_stage(stage);
+
+ cull->winding = stage->draw->rasterizer->cull_mode;
+
+ stage->tri = cull_tri;
+ stage->tri( stage, header );
+}
+
+
+
+static void cull_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->tri = cull_first_tri;
+ stage->next->flush( stage->next, flags );
+}
+
+static void cull_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void cull_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create a new polygon culling stage.
+ */
+struct draw_stage *draw_cull_stage( struct draw_context *draw )
+{
+ struct cull_stage *cull = CALLOC_STRUCT(cull_stage);
+ if (cull == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &cull->stage, 0 ))
+ goto fail;
+
+ cull->stage.draw = draw;
+ cull->stage.name = "cull";
+ cull->stage.next = NULL;
+ cull->stage.point = draw_pipe_passthrough_point;
+ cull->stage.line = draw_pipe_passthrough_line;
+ cull->stage.tri = cull_first_tri;
+ cull->stage.flush = cull_flush;
+ cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
+ cull->stage.destroy = cull_destroy;
+
+ return &cull->stage;
+
+ fail:
+ if (cull)
+ cull->stage.destroy( &cull->stage );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
new file mode 100644
index 0000000000..34afb1a0b6
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -0,0 +1,282 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
+
+
+/** subclass of draw_stage */
+struct flat_stage
+{
+ struct draw_stage stage;
+
+ uint num_color_attribs;
+ uint color_attribs[2]; /* front/back primary colors */
+
+ uint num_spec_attribs;
+ uint spec_attribs[2]; /* front/back secondary colors */
+};
+
+#define COPY_3FV( DST, SRC ) \
+do { \
+ (DST)[0] = (SRC)[0]; \
+ (DST)[1] = (SRC)[1]; \
+ (DST)[2] = (SRC)[2]; \
+} while (0)
+
+
+static INLINE struct flat_stage *
+flat_stage(struct draw_stage *stage)
+{
+ return (struct flat_stage *) stage;
+}
+
+
+/** Copy all the color attributes from 'src' vertex to 'dst' vertex */
+static INLINE void copy_colors( struct draw_stage *stage,
+ struct vertex_header *dst,
+ const struct vertex_header *src )
+{
+ const struct flat_stage *flat = flat_stage(stage);
+ uint i;
+
+ for (i = 0; i < flat->num_color_attribs; i++) {
+ const uint attr = flat->color_attribs[i];
+ COPY_4FV(dst->data[attr], src->data[attr]);
+ }
+
+ for (i = 0; i < flat->num_spec_attribs; i++) {
+ const uint attr = flat->spec_attribs[i];
+ COPY_3FV(dst->data[attr], src->data[attr]);
+ }
+}
+
+
+/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */
+static INLINE void copy_colors2( struct draw_stage *stage,
+ struct vertex_header *dst0,
+ struct vertex_header *dst1,
+ const struct vertex_header *src )
+{
+ const struct flat_stage *flat = flat_stage(stage);
+ uint i;
+ for (i = 0; i < flat->num_color_attribs; i++) {
+ const uint attr = flat->color_attribs[i];
+ COPY_4FV(dst0->data[attr], src->data[attr]);
+ COPY_4FV(dst1->data[attr], src->data[attr]);
+ }
+
+ for (i = 0; i < flat->num_spec_attribs; i++) {
+ const uint attr = flat->spec_attribs[i];
+ COPY_3FV(dst0->data[attr], src->data[attr]);
+ COPY_3FV(dst1->data[attr], src->data[attr]);
+ }
+}
+
+
+/**
+ * Flatshade tri. Required for clipping and when unfilled tris are
+ * active, otherwise handled by hardware.
+ */
+static void flatshade_tri_0( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
+ tmp.v[0] = header->v[0];
+ tmp.v[1] = dup_vert(stage, header->v[1], 0);
+ tmp.v[2] = dup_vert(stage, header->v[2], 1);
+
+ copy_colors2(stage, tmp.v[1], tmp.v[2], tmp.v[0]);
+
+ stage->next->tri( stage->next, &tmp );
+}
+
+
+static void flatshade_tri_2( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
+ tmp.v[0] = dup_vert(stage, header->v[0], 0);
+ tmp.v[1] = dup_vert(stage, header->v[1], 1);
+ tmp.v[2] = header->v[2];
+
+ copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]);
+
+ stage->next->tri( stage->next, &tmp );
+}
+
+
+
+
+
+/**
+ * Flatshade line. Required for clipping.
+ */
+static void flatshade_line_0( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.v[0] = header->v[0];
+ tmp.v[1] = dup_vert(stage, header->v[1], 0);
+
+ copy_colors(stage, tmp.v[1], tmp.v[0]);
+
+ stage->next->line( stage->next, &tmp );
+}
+
+static void flatshade_line_1( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.v[0] = dup_vert(stage, header->v[0], 0);
+ tmp.v[1] = header->v[1];
+
+ copy_colors(stage, tmp.v[0], tmp.v[1]);
+
+ stage->next->line( stage->next, &tmp );
+}
+
+
+
+
+static void flatshade_init_state( struct draw_stage *stage )
+{
+ struct flat_stage *flat = flat_stage(stage);
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
+ uint i;
+
+ /* Find which vertex shader outputs are colors, make a list */
+ flat->num_color_attribs = 0;
+ flat->num_spec_attribs = 0;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
+ vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ if (vs->info.output_semantic_index[i] == 0)
+ flat->color_attribs[flat->num_color_attribs++] = i;
+ else
+ flat->spec_attribs[flat->num_spec_attribs++] = i;
+ }
+ }
+
+ /* Choose flatshade routine according to provoking vertex:
+ */
+ if (stage->draw->rasterizer->flatshade_first) {
+ stage->line = flatshade_line_0;
+ stage->tri = flatshade_tri_0;
+ }
+ else {
+ stage->line = flatshade_line_1;
+ stage->tri = flatshade_tri_2;
+ }
+}
+
+static void flatshade_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ flatshade_init_state( stage );
+ stage->tri( stage, header );
+}
+
+static void flatshade_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ flatshade_init_state( stage );
+ stage->line( stage, header );
+}
+
+
+static void flatshade_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->tri = flatshade_first_tri;
+ stage->line = flatshade_first_line;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void flatshade_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void flatshade_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create flatshading drawing stage.
+ */
+struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
+{
+ struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
+ if (flatshade == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
+ goto fail;
+
+ flatshade->stage.draw = draw;
+ flatshade->stage.name = "flatshade";
+ flatshade->stage.next = NULL;
+ flatshade->stage.point = draw_pipe_passthrough_point;
+ flatshade->stage.line = flatshade_first_line;
+ flatshade->stage.tri = flatshade_first_tri;
+ flatshade->stage.flush = flatshade_flush;
+ flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter;
+ flatshade->stage.destroy = flatshade_destroy;
+
+ return &flatshade->stage;
+
+ fail:
+ if (flatshade)
+ flatshade->stage.destroy( &flatshade->stage );
+
+ return NULL;
+}
+
+
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
new file mode 100644
index 0000000000..40798a5d6e
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -0,0 +1,185 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief polygon offset state
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw_pipe.h"
+
+
+
+struct offset_stage {
+ struct draw_stage stage;
+
+ float scale;
+ float units;
+};
+
+
+
+static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
+{
+ return (struct offset_stage *) stage;
+}
+
+
+
+
+
+/**
+ * Offset tri Z. Some hardware can handle this, but not usually when
+ * doing unfilled rendering.
+ */
+static void do_offset_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ const unsigned pos = stage->draw->vs.position_output;
+ struct offset_stage *offset = offset_stage(stage);
+ float inv_det = 1.0f / header->det;
+
+ /* Window coords:
+ */
+ float *v0 = header->v[0]->data[pos];
+ float *v1 = header->v[1]->data[pos];
+ float *v2 = header->v[2]->data[pos];
+
+ /* edge vectors e = v0 - v2, f = v1 - v2 */
+ float ex = v0[0] - v2[0];
+ float ey = v0[1] - v2[1];
+ float ez = v0[2] - v2[2];
+ float fx = v1[0] - v2[0];
+ float fy = v1[1] - v2[1];
+ float fz = v1[2] - v2[2];
+
+ /* (a,b) = cross(e,f).xy */
+ float a = ey*fz - ez*fy;
+ float b = ez*fx - ex*fz;
+
+ float dzdx = fabsf(a * inv_det);
+ float dzdy = fabsf(b * inv_det);
+
+ float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale;
+
+ /*
+ * Note: we're applying the offset and clamping per-vertex.
+ * Ideally, the offset is applied per-fragment prior to fragment shading.
+ */
+ v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f);
+ v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f);
+ v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f);
+
+ stage->next->tri( stage->next, header );
+}
+
+
+static void offset_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
+ tmp.v[0] = dup_vert(stage, header->v[0], 0);
+ tmp.v[1] = dup_vert(stage, header->v[1], 1);
+ tmp.v[2] = dup_vert(stage, header->v[2], 2);
+
+ do_offset_tri( stage, &tmp );
+}
+
+
+static void offset_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct offset_stage *offset = offset_stage(stage);
+
+ offset->units = (float) (stage->draw->rasterizer->offset_units * stage->draw->mrd);
+ offset->scale = stage->draw->rasterizer->offset_scale;
+
+ stage->tri = offset_tri;
+ stage->tri( stage, header );
+}
+
+
+
+
+static void offset_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->tri = offset_first_tri;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void offset_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void offset_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create polygon offset drawing stage.
+ */
+struct draw_stage *draw_offset_stage( struct draw_context *draw )
+{
+ struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
+ if (offset == NULL)
+ goto fail;
+
+ draw_alloc_temp_verts( &offset->stage, 3 );
+
+ offset->stage.draw = draw;
+ offset->stage.name = "offset";
+ offset->stage.next = NULL;
+ offset->stage.point = draw_pipe_passthrough_point;
+ offset->stage.line = draw_pipe_passthrough_line;
+ offset->stage.tri = offset_first_tri;
+ offset->stage.flush = offset_flush;
+ offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
+ offset->stage.destroy = offset_destroy;
+
+ return &offset->stage;
+
+ fail:
+ if (offset)
+ offset->stage.destroy( &offset->stage );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
new file mode 100644
index 0000000000..283502cdf3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -0,0 +1,777 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Polygon stipple stage: implement polygon stipple with texture map and
+ * fragment program. The fragment program samples the texture and does
+ * a fragment kill for the stipple-failing fragments.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "draw_context.h"
+#include "draw_pipe.h"
+
+
+
+/**
+ * Subclass of pipe_shader_state to carry extra fragment shader info.
+ */
+struct pstip_fragment_shader
+{
+ struct pipe_shader_state state;
+ void *driver_fs;
+ void *pstip_fs;
+ uint sampler_unit;
+};
+
+
+/**
+ * Subclass of draw_stage
+ */
+struct pstip_stage
+{
+ struct draw_stage stage;
+
+ void *sampler_cso;
+ struct pipe_texture *texture;
+ uint num_samplers;
+ uint num_textures;
+
+ /*
+ * Currently bound state
+ */
+ struct pstip_fragment_shader *fs;
+ struct {
+ void *samplers[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
+ const struct pipe_poly_stipple *stipple;
+ } state;
+
+ /*
+ * Driver interface/override functions
+ */
+ void * (*driver_create_fs_state)(struct pipe_context *,
+ const struct pipe_shader_state *);
+ void (*driver_bind_fs_state)(struct pipe_context *, void *);
+ void (*driver_delete_fs_state)(struct pipe_context *, void *);
+
+ void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **);
+
+ void (*driver_set_sampler_textures)(struct pipe_context *, unsigned,
+ struct pipe_texture **);
+
+ void (*driver_set_polygon_stipple)(struct pipe_context *,
+ const struct pipe_poly_stipple *);
+
+ struct pipe_context *pipe;
+};
+
+
+
+/**
+ * Subclass of tgsi_transform_context, used for transforming the
+ * user's fragment shader to add the special AA instructions.
+ */
+struct pstip_transform_context {
+ struct tgsi_transform_context base;
+ uint tempsUsed; /**< bitmask */
+ int wincoordInput;
+ int maxInput;
+ uint samplersUsed; /**< bitfield of samplers used */
+ int freeSampler; /** an available sampler for the pstipple */
+ int texTemp; /**< temp registers */
+ int numImmed;
+ boolean firstInstruction;
+};
+
+
+/**
+ * TGSI declaration transform callback.
+ * Look for a free sampler, a free input attrib, and two free temp regs.
+ */
+static void
+pstip_transform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
+
+ if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+ uint i;
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
+ pctx->samplersUsed |= 1 << i;
+ }
+ }
+ else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last);
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION)
+ pctx->wincoordInput = (int) decl->DeclarationRange.First;
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
+ pctx->tempsUsed |= (1 << i);
+ }
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+pstip_transform_immed(struct tgsi_transform_context *ctx,
+ struct tgsi_full_immediate *immed)
+{
+ struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
+ pctx->numImmed++;
+}
+
+
+/**
+ * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
+ */
+static int
+free_bit(uint bitfield)
+{
+ int i;
+ for (i = 0; i < 32; i++) {
+ if ((bitfield & (1 << i)) == 0)
+ return i;
+ }
+ return -1;
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ * Replace writes to result.color w/ a temp reg.
+ * Upon END instruction, insert texture sampling code for antialiasing.
+ */
+static void
+pstip_transform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
+
+ if (pctx->firstInstruction) {
+ /* emit our new declarations before the first instruction */
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction newInst;
+ uint i;
+ int wincoordInput;
+
+ /* find free sampler */
+ pctx->freeSampler = free_bit(pctx->samplersUsed);
+ if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
+ pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
+
+ if (pctx->wincoordInput < 0)
+ wincoordInput = pctx->maxInput + 1;
+ else
+ wincoordInput = pctx->wincoordInput;
+
+ /* find one free temp reg */
+ for (i = 0; i < 32; i++) {
+ if ((pctx->tempsUsed & (1 << i)) == 0) {
+ /* found a free temp */
+ if (pctx->texTemp < 0)
+ pctx->texTemp = i;
+ else
+ break;
+ }
+ }
+ assert(pctx->texTemp >= 0);
+
+ if (pctx->wincoordInput < 0) {
+ /* declare new position input reg */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION;
+ decl.Semantic.SemanticIndex = 0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = wincoordInput;
+ ctx->emit_declaration(ctx, &decl);
+ }
+
+ /* declare new sampler */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = pctx->freeSampler;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* declare new temp regs */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = pctx->texTemp;
+ ctx->emit_declaration(ctx, &decl);
+
+ /* emit immediate = {1/32, 1/32, 1, 1}
+ * The index/position of this immediate will be pctx->numImmed
+ */
+ {
+ static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 };
+ struct tgsi_full_immediate immed;
+ uint size = 4;
+ immed = tgsi_default_full_immediate();
+ immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
+ immed.u[0].Float = value[0];
+ immed.u[1].Float = value[1];
+ immed.u[2].Float = value[2];
+ immed.u[3].Float = value[3];
+ ctx->emit_immediate(ctx, &immed);
+ }
+
+ pctx->firstInstruction = FALSE;
+
+
+ /*
+ * Insert new MUL/TEX/KILP instructions at start of program
+ * Take gl_FragCoord, divide by 32 (stipple size), sample the
+ * texture and kill fragment if needed.
+ *
+ * We'd like to use non-normalized texcoords to index into a RECT
+ * texture, but we can only use GL_REPEAT wrap mode with normalized
+ * texcoords. Darn.
+ */
+
+ /* MUL texTemp, INPUT[wincoord], 1/32; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* TEX texTemp, texTemp, sampler; */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* KIL -texTemp; # if -texTemp < 0, KILL fragment */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
+ newInst.Instruction.NumDstRegs = 0;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp;
+ newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
+ ctx->emit_instruction(ctx, &newInst);
+ }
+
+ /* emit this instruction */
+ ctx->emit_instruction(ctx, inst);
+}
+
+
+/**
+ * Generate the frag shader we'll use for doing polygon stipple.
+ * This will be the user's shader prefixed with a TEX and KIL instruction.
+ */
+static boolean
+generate_pstip_fs(struct pstip_stage *pstip)
+{
+ const struct pipe_shader_state *orig_fs = &pstip->fs->state;
+ /*struct draw_context *draw = pstip->stage.draw;*/
+ struct pipe_shader_state pstip_fs;
+ struct pstip_transform_context transform;
+
+#define MAX 1000
+
+ pstip_fs = *orig_fs; /* copy to init */
+ pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (pstip_fs.tokens == NULL)
+ return FALSE;
+
+ memset(&transform, 0, sizeof(transform));
+ transform.wincoordInput = -1;
+ transform.maxInput = -1;
+ transform.texTemp = -1;
+ transform.firstInstruction = TRUE;
+ transform.base.transform_instruction = pstip_transform_inst;
+ transform.base.transform_declaration = pstip_transform_decl;
+ transform.base.transform_immediate = pstip_transform_immed;
+
+ tgsi_transform_shader(orig_fs->tokens,
+ (struct tgsi_token *) pstip_fs.tokens,
+ MAX, &transform.base);
+
+#if 0 /* DEBUG */
+ tgsi_dump(orig_fs->tokens, 0);
+ tgsi_dump(pstip_fs.tokens, 0);
+#endif
+
+ pstip->fs->sampler_unit = transform.freeSampler;
+ assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
+
+ pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
+
+ FREE((void *)pstip_fs.tokens);
+ return TRUE;
+}
+
+
+/**
+ * Load texture image with current stipple pattern.
+ */
+static void
+pstip_update_texture(struct pstip_stage *pstip)
+{
+ static const uint bit31 = 1 << 31;
+ struct pipe_context *pipe = pstip->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_transfer *transfer;
+ const uint *stipple = pstip->state.stipple->stipple;
+ uint i, j;
+ ubyte *data;
+
+ /* XXX: want to avoid flushing just because we use stipple:
+ */
+ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL );
+
+ transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0,
+ PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
+ data = screen->transfer_map(screen, transfer);
+
+ /*
+ * Load alpha texture.
+ * Note: 0 means keep the fragment, 255 means kill it.
+ * We'll negate the texel value and use KILP which kills if value
+ * is negative.
+ */
+ for (i = 0; i < 32; i++) {
+ for (j = 0; j < 32; j++) {
+ if (stipple[i] & (bit31 >> j)) {
+ /* fragment "on" */
+ data[i * transfer->stride + j] = 0;
+ }
+ else {
+ /* fragment "off" */
+ data[i * transfer->stride + j] = 255;
+ }
+ }
+ }
+
+ /* unmap */
+ screen->transfer_unmap(screen, transfer);
+ screen->tex_transfer_destroy(transfer);
+}
+
+
+/**
+ * Create the texture map we'll use for stippling.
+ */
+static boolean
+pstip_create_texture(struct pstip_stage *pstip)
+{
+ struct pipe_context *pipe = pstip->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_texture texTemp;
+
+ memset(&texTemp, 0, sizeof(texTemp));
+ texTemp.target = PIPE_TEXTURE_2D;
+ texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
+ texTemp.last_level = 0;
+ texTemp.width[0] = 32;
+ texTemp.height[0] = 32;
+ texTemp.depth[0] = 1;
+ pf_get_block(texTemp.format, &texTemp.block);
+
+ pstip->texture = screen->texture_create(screen, &texTemp);
+ if (pstip->texture == NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/**
+ * Create the sampler CSO that'll be used for stippling.
+ */
+static boolean
+pstip_create_sampler(struct pstip_stage *pstip)
+{
+ struct pipe_sampler_state sampler;
+ struct pipe_context *pipe = pstip->pipe;
+
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+ sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+ sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler.normalized_coords = 1;
+ sampler.min_lod = 0.0f;
+ sampler.max_lod = 0.0f;
+
+ pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ if (pstip->sampler_cso == NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/**
+ * When we're about to draw our first stipple polygon in a batch, this function
+ * is called to tell the driver to bind our modified fragment shader.
+ */
+static boolean
+bind_pstip_fragment_shader(struct pstip_stage *pstip)
+{
+ struct draw_context *draw = pstip->stage.draw;
+ if (!pstip->fs->pstip_fs &&
+ !generate_pstip_fs(pstip))
+ return FALSE;
+
+ draw->suspend_flushing = TRUE;
+ pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs);
+ draw->suspend_flushing = FALSE;
+ return TRUE;
+}
+
+
+static INLINE struct pstip_stage *
+pstip_stage( struct draw_stage *stage )
+{
+ return (struct pstip_stage *) stage;
+}
+
+
+static void
+pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ struct pstip_stage *pstip = pstip_stage(stage);
+ struct pipe_context *pipe = pstip->pipe;
+ struct draw_context *draw = stage->draw;
+ uint num_samplers;
+
+ assert(stage->draw->rasterizer->poly_stipple_enable);
+
+ /* bind our fragprog */
+ if (!bind_pstip_fragment_shader(pstip)) {
+ stage->tri = draw_pipe_passthrough_tri;
+ stage->tri(stage, header);
+ return;
+ }
+
+
+ /* how many samplers? */
+ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
+ num_samplers = MAX2(pstip->num_textures, pstip->num_samplers);
+ num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1);
+
+ /* plug in our sampler, texture */
+ pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso;
+ pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit],
+ pstip->texture);
+
+ assert(num_samplers <= PIPE_MAX_SAMPLERS);
+
+ draw->suspend_flushing = TRUE;
+ pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers);
+ pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures);
+ draw->suspend_flushing = FALSE;
+
+ /* now really draw first triangle */
+ stage->tri = draw_pipe_passthrough_tri;
+ stage->tri(stage, header);
+}
+
+
+static void
+pstip_flush(struct draw_stage *stage, unsigned flags)
+{
+ struct draw_context *draw = stage->draw;
+ struct pstip_stage *pstip = pstip_stage(stage);
+ struct pipe_context *pipe = pstip->pipe;
+
+ stage->tri = pstip_first_tri;
+ stage->next->flush( stage->next, flags );
+
+ /* restore original frag shader, texture, sampler state */
+ draw->suspend_flushing = TRUE;
+ pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs);
+ pstip->driver_bind_sampler_states(pipe, pstip->num_samplers,
+ pstip->state.samplers);
+ pstip->driver_set_sampler_textures(pipe, pstip->num_textures,
+ pstip->state.textures);
+ draw->suspend_flushing = FALSE;
+}
+
+
+static void
+pstip_reset_stipple_counter(struct draw_stage *stage)
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void
+pstip_destroy(struct draw_stage *stage)
+{
+ struct pstip_stage *pstip = pstip_stage(stage);
+ uint i;
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&pstip->state.textures[i], NULL);
+ }
+
+ pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso);
+
+ pipe_texture_reference(&pstip->texture, NULL);
+
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+static struct pstip_stage *
+draw_pstip_stage(struct draw_context *draw)
+{
+ struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage);
+
+ draw_alloc_temp_verts( &pstip->stage, 8 );
+
+ pstip->stage.draw = draw;
+ pstip->stage.name = "pstip";
+ pstip->stage.next = NULL;
+ pstip->stage.point = draw_pipe_passthrough_point;
+ pstip->stage.line = draw_pipe_passthrough_line;
+ pstip->stage.tri = pstip_first_tri;
+ pstip->stage.flush = pstip_flush;
+ pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter;
+ pstip->stage.destroy = pstip_destroy;
+
+ return pstip;
+}
+
+
+static struct pstip_stage *
+pstip_stage_from_pipe(struct pipe_context *pipe)
+{
+ struct draw_context *draw = (struct draw_context *) pipe->draw;
+ return pstip_stage(draw->pipeline.pstipple);
+}
+
+
+/**
+ * This function overrides the driver's create_fs_state() function and
+ * will typically be called by the state tracker.
+ */
+static void *
+pstip_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *fs)
+{
+ struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+ struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader);
+
+ if (aafs) {
+ aafs->state = *fs;
+
+ /* pass-through */
+ aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs);
+ }
+
+ return aafs;
+}
+
+
+static void
+pstip_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+ struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs;
+ /* save current */
+ pstip->fs = aafs;
+ /* pass-through */
+ pstip->driver_bind_fs_state(pstip->pipe,
+ (aafs ? aafs->driver_fs : NULL));
+}
+
+
+static void
+pstip_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+ struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs;
+ /* pass-through */
+ pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs);
+
+ if (aafs->pstip_fs)
+ pstip->driver_delete_fs_state(pstip->pipe, aafs->pstip_fs);
+
+ FREE(aafs);
+}
+
+
+static void
+pstip_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+ uint i;
+
+ /* save current */
+ memcpy(pstip->state.samplers, sampler, num * sizeof(void *));
+ for (i = num; i < PIPE_MAX_SAMPLERS; i++) {
+ pstip->state.samplers[i] = NULL;
+ }
+
+ pstip->num_samplers = num;
+ /* pass-through */
+ pstip->driver_bind_sampler_states(pstip->pipe, num, sampler);
+}
+
+
+static void
+pstip_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num, struct pipe_texture **texture)
+{
+ struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+ uint i;
+
+ /* save current */
+ for (i = 0; i < num; i++) {
+ pipe_texture_reference(&pstip->state.textures[i], texture[i]);
+ }
+ for (; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&pstip->state.textures[i], NULL);
+ }
+
+ pstip->num_textures = num;
+
+ /* pass-through */
+ pstip->driver_set_sampler_textures(pstip->pipe, num, texture);
+}
+
+
+static void
+pstip_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+
+ /* save current */
+ pstip->state.stipple = stipple;
+
+ /* pass-through */
+ pstip->driver_set_polygon_stipple(pstip->pipe, stipple);
+
+ pstip_update_texture(pstip);
+}
+
+
+/**
+ * Called by drivers that want to install this polygon stipple stage
+ * into the draw module's pipeline. This will not be used if the
+ * hardware has native support for polygon stipple.
+ */
+boolean
+draw_install_pstipple_stage(struct draw_context *draw,
+ struct pipe_context *pipe)
+{
+ struct pstip_stage *pstip;
+
+ pipe->draw = (void *) draw;
+
+ /*
+ * Create / install pgon stipple drawing / prim stage
+ */
+ pstip = draw_pstip_stage( draw );
+ if (pstip == NULL)
+ goto fail;
+
+ draw->pipeline.pstipple = &pstip->stage;
+
+ pstip->pipe = pipe;
+
+ /* create special texture, sampler state */
+ if (!pstip_create_texture(pstip))
+ goto fail;
+
+ if (!pstip_create_sampler(pstip))
+ goto fail;
+
+ /* save original driver functions */
+ pstip->driver_create_fs_state = pipe->create_fs_state;
+ pstip->driver_bind_fs_state = pipe->bind_fs_state;
+ pstip->driver_delete_fs_state = pipe->delete_fs_state;
+
+ pstip->driver_bind_sampler_states = pipe->bind_sampler_states;
+ pstip->driver_set_sampler_textures = pipe->set_sampler_textures;
+ pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
+
+ /* override the driver's functions */
+ pipe->create_fs_state = pstip_create_fs_state;
+ pipe->bind_fs_state = pstip_bind_fs_state;
+ pipe->delete_fs_state = pstip_delete_fs_state;
+
+ pipe->bind_sampler_states = pstip_bind_sampler_states;
+ pipe->set_sampler_textures = pstip_set_sampler_textures;
+ pipe->set_polygon_stipple = pstip_set_polygon_stipple;
+
+ return TRUE;
+
+ fail:
+ if (pstip)
+ pstip->stage.destroy( &pstip->stage );
+
+ return FALSE;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
new file mode 100644
index 0000000000..6e921bac27
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -0,0 +1,251 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/* Implement line stipple by cutting lines up into smaller lines.
+ * There are hundreds of ways to implement line stipple, this is one
+ * choice that should work in all situations, requires no state
+ * manipulations, but with a penalty in terms of large amounts of
+ * generated geometry.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_pipe.h"
+
+
+/** Subclass of draw_stage */
+struct stipple_stage {
+ struct draw_stage stage;
+ float counter;
+ uint pattern;
+ uint factor;
+};
+
+
+static INLINE struct stipple_stage *
+stipple_stage(struct draw_stage *stage)
+{
+ return (struct stipple_stage *) stage;
+}
+
+
+/**
+ * Compute interpolated vertex attributes for 'dst' at position 't'
+ * between 'v0' and 'v1'.
+ * XXX using linear interpolation for all attribs at this time.
+ */
+static void
+screen_interp( struct draw_context *draw,
+ struct vertex_header *dst,
+ float t,
+ const struct vertex_header *v0,
+ const struct vertex_header *v1 )
+{
+ uint attr;
+ for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) {
+ const float *val0 = v0->data[attr];
+ const float *val1 = v1->data[attr];
+ float *newv = dst->data[attr];
+ uint i;
+ for (i = 0; i < 4; i++) {
+ newv[i] = val0[i] + t * (val1[i] - val0[i]);
+ }
+ }
+}
+
+
+static void
+emit_segment(struct draw_stage *stage, struct prim_header *header,
+ float t0, float t1)
+{
+ struct vertex_header *v0new = dup_vert(stage, header->v[0], 0);
+ struct vertex_header *v1new = dup_vert(stage, header->v[1], 1);
+ struct prim_header newprim = *header;
+
+ if (t0 > 0.0) {
+ screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] );
+ newprim.v[0] = v0new;
+ }
+
+ if (t1 < 1.0) {
+ screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] );
+ newprim.v[1] = v1new;
+ }
+
+ stage->next->line( stage->next, &newprim );
+}
+
+
+static INLINE unsigned
+stipple_test(int counter, ushort pattern, int factor)
+{
+ int b = (counter / factor) & 0xf;
+ return (1 << b) & pattern;
+}
+
+
+static void
+stipple_line(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ const unsigned pos = stage->draw->vs.position_output;
+ const float *pos0 = v0->data[pos];
+ const float *pos1 = v1->data[pos];
+ float start = 0;
+ int state = 0;
+
+ float x0 = pos0[0];
+ float x1 = pos1[0];
+ float y0 = pos0[1];
+ float y1 = pos1[1];
+
+ float dx = x0 > x1 ? x0 - x1 : x1 - x0;
+ float dy = y0 > y1 ? y0 - y1 : y1 - y0;
+
+ float length = MAX2(dx, dy);
+ int i;
+
+ if (header->flags & DRAW_PIPE_RESET_STIPPLE)
+ stipple->counter = 0;
+
+
+ /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table.
+ */
+ for (i = 0; i < length; i++) {
+ int result = stipple_test( (int) stipple->counter+i,
+ (ushort) stipple->pattern, stipple->factor );
+ if (result != state) {
+ /* changing from "off" to "on" or vice versa */
+ if (state) {
+ if (start != i) {
+ /* finishing an "on" segment */
+ emit_segment( stage, header, start / length, i / length );
+ }
+ }
+ else {
+ /* starting an "on" segment */
+ start = (float) i;
+ }
+ state = result;
+ }
+ }
+
+ if (state && start < length)
+ emit_segment( stage, header, start / length, 1.0 );
+
+ stipple->counter += length;
+}
+
+
+static void
+reset_stipple_counter(struct draw_stage *stage)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+static void
+stipple_reset_point(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->point(stage->next, header);
+}
+
+static void
+stipple_reset_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->tri(stage->next, header);
+}
+
+
+static void
+stipple_first_line(struct draw_stage *stage,
+ struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ struct draw_context *draw = stage->draw;
+
+ stipple->pattern = draw->rasterizer->line_stipple_pattern;
+ stipple->factor = draw->rasterizer->line_stipple_factor + 1;
+
+ stage->line = stipple_line;
+ stage->line( stage, header );
+}
+
+
+static void
+stipple_flush(struct draw_stage *stage, unsigned flags)
+{
+ stage->line = stipple_first_line;
+ stage->next->flush( stage->next, flags );
+}
+
+
+
+
+static void
+stipple_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create line stippler stage
+ */
+struct draw_stage *draw_stipple_stage( struct draw_context *draw )
+{
+ struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
+
+ draw_alloc_temp_verts( &stipple->stage, 2 );
+
+ stipple->stage.draw = draw;
+ stipple->stage.name = "stipple";
+ stipple->stage.next = NULL;
+ stipple->stage.point = stipple_reset_point;
+ stipple->stage.line = stipple_first_line;
+ stipple->stage.tri = stipple_reset_tri;
+ stipple->stage.reset_stipple_counter = reset_stipple_counter;
+ stipple->stage.flush = stipple_flush;
+ stipple->stage.destroy = stipple_destroy;
+
+ return &stipple->stage;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
new file mode 100644
index 0000000000..eef0238b15
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -0,0 +1,200 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
+
+struct twoside_stage {
+ struct draw_stage stage;
+ float sign; /**< +1 or -1 */
+ uint attrib_front0, attrib_back0;
+ uint attrib_front1, attrib_back1;
+};
+
+
+static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
+{
+ return (struct twoside_stage *)stage;
+}
+
+
+
+
+/**
+ * Copy back color(s) to front color(s).
+ */
+static INLINE struct vertex_header *
+copy_bfc( struct twoside_stage *twoside,
+ const struct vertex_header *v,
+ unsigned idx )
+{
+ struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx );
+
+ if (twoside->attrib_back0) {
+ COPY_4FV(tmp->data[twoside->attrib_front0],
+ tmp->data[twoside->attrib_back0]);
+ }
+ if (twoside->attrib_back1) {
+ COPY_4FV(tmp->data[twoside->attrib_front1],
+ tmp->data[twoside->attrib_back1]);
+ }
+
+ return tmp;
+}
+
+
+/* Twoside tri:
+ */
+static void twoside_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct twoside_stage *twoside = twoside_stage(stage);
+
+ if (header->det * twoside->sign < 0.0) {
+ /* this is a back-facing triangle */
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
+ /* copy back attribs to front attribs */
+ tmp.v[0] = copy_bfc(twoside, header->v[0], 0);
+ tmp.v[1] = copy_bfc(twoside, header->v[1], 1);
+ tmp.v[2] = copy_bfc(twoside, header->v[2], 2);
+
+ stage->next->tri( stage->next, &tmp );
+ }
+ else {
+ stage->next->tri( stage->next, header );
+ }
+}
+
+
+
+static void twoside_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct twoside_stage *twoside = twoside_stage(stage);
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
+ uint i;
+
+ twoside->attrib_front0 = 0;
+ twoside->attrib_front1 = 0;
+ twoside->attrib_back0 = 0;
+ twoside->attrib_back1 = 0;
+
+ /* Find which vertex shader outputs are front/back colors */
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (vs->info.output_semantic_index[i] == 0)
+ twoside->attrib_front0 = i;
+ else
+ twoside->attrib_front1 = i;
+ }
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ if (vs->info.output_semantic_index[i] == 0)
+ twoside->attrib_back0 = i;
+ else
+ twoside->attrib_back1 = i;
+ }
+ }
+
+ if (!twoside->attrib_back0)
+ twoside->attrib_front0 = 0;
+
+ if (!twoside->attrib_back1)
+ twoside->attrib_front1 = 0;
+
+ /*
+ * We'll multiply the primitive's determinant by this sign to determine
+ * if the triangle is back-facing (negative).
+ * sign = -1 for CCW, +1 for CW
+ */
+ twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f;
+
+ stage->tri = twoside_tri;
+ stage->tri( stage, header );
+}
+
+
+static void twoside_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->tri = twoside_first_tri;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void twoside_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void twoside_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create twoside pipeline stage.
+ */
+struct draw_stage *draw_twoside_stage( struct draw_context *draw )
+{
+ struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
+ if (twoside == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
+ goto fail;
+
+ twoside->stage.draw = draw;
+ twoside->stage.name = "twoside";
+ twoside->stage.next = NULL;
+ twoside->stage.point = draw_pipe_passthrough_point;
+ twoside->stage.line = draw_pipe_passthrough_line;
+ twoside->stage.tri = twoside_first_tri;
+ twoside->stage.flush = twoside_flush;
+ twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
+ twoside->stage.destroy = twoside_destroy;
+
+ return &twoside->stage;
+
+ fail:
+ if (twoside)
+ twoside->stage.destroy( &twoside->stage );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
new file mode 100644
index 0000000000..03bb842e20
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -0,0 +1,204 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Drawing stage for handling glPolygonMode(line/point).
+ * Convert triangles to points or lines as needed.
+ */
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "draw_private.h"
+#include "draw_pipe.h"
+
+
+struct unfilled_stage {
+ struct draw_stage stage;
+
+ /** [0] = front face, [1] = back face.
+ * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE,
+ * and PIPE_POLYGON_MODE_POINT,
+ */
+ unsigned mode[2];
+};
+
+
+static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
+{
+ return (struct unfilled_stage *)stage;
+}
+
+
+
+static void point( struct draw_stage *stage,
+ struct vertex_header *v0 )
+{
+ struct prim_header tmp;
+ tmp.v[0] = v0;
+ stage->next->point( stage->next, &tmp );
+}
+
+static void line( struct draw_stage *stage,
+ struct vertex_header *v0,
+ struct vertex_header *v1 )
+{
+ struct prim_header tmp;
+ tmp.v[0] = v0;
+ tmp.v[1] = v1;
+ stage->next->line( stage->next, &tmp );
+}
+
+
+static void points( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ struct vertex_header *v2 = header->v[2];
+
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 );
+}
+
+
+static void lines( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ struct vertex_header *v2 = header->v[2];
+
+ if (header->flags & DRAW_PIPE_RESET_STIPPLE)
+ stage->next->reset_stipple_counter( stage->next );
+
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 );
+}
+
+
+/* Unfilled tri:
+ *
+ * Note edgeflags in the vertex struct is not sufficient as we will
+ * need to manipulate them when decomposing primitives.
+ *
+ * We currently keep the vertex edgeflag and primitive edgeflag mask
+ * separate until the last possible moment.
+ */
+static void unfilled_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct unfilled_stage *unfilled = unfilled_stage(stage);
+ unsigned mode = unfilled->mode[header->det >= 0.0];
+
+ switch (mode) {
+ case PIPE_POLYGON_MODE_FILL:
+ stage->next->tri( stage->next, header );
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ lines( stage, header );
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ points( stage, header );
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void unfilled_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct unfilled_stage *unfilled = unfilled_stage(stage);
+
+ unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */
+ unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */
+
+ stage->tri = unfilled_tri;
+ stage->tri( stage, header );
+}
+
+
+
+static void unfilled_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->next->flush( stage->next, flags );
+
+ stage->tri = unfilled_first_tri;
+}
+
+
+static void unfilled_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void unfilled_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create unfilled triangle stage.
+ */
+struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
+{
+ struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage);
+ if (unfilled == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
+ goto fail;
+
+ unfilled->stage.draw = draw;
+ unfilled->stage.name = "unfilled";
+ unfilled->stage.next = NULL;
+ unfilled->stage.tmp = NULL;
+ unfilled->stage.point = draw_pipe_passthrough_point;
+ unfilled->stage.line = draw_pipe_passthrough_line;
+ unfilled->stage.tri = unfilled_first_tri;
+ unfilled->stage.flush = unfilled_flush;
+ unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
+ unfilled->stage.destroy = unfilled_destroy;
+
+ return &unfilled->stage;
+
+ fail:
+ if (unfilled)
+ unfilled->stage.destroy( &unfilled->stage );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c
new file mode 100644
index 0000000000..e22e5fed0c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_util.c
@@ -0,0 +1,137 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pipe.h"
+
+
+
+void
+draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->point(stage->next, header);
+}
+
+void
+draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->line(stage->next, header);
+}
+
+void
+draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->tri(stage->next, header);
+}
+
+
+
+
+
+/* This is only used for temporary verts.
+ */
+#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
+
+
+/**
+ * Allocate space for temporary post-transform vertices, such as for clipping.
+ */
+boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
+{
+ assert(!stage->tmp);
+
+ stage->tmp = NULL;
+ stage->nr_tmps = nr;
+
+ if (nr != 0)
+ {
+ unsigned i;
+ ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
+
+ if (store == NULL)
+ return FALSE;
+
+ stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
+ if (stage->tmp == NULL) {
+ FREE(store);
+ return FALSE;
+ }
+
+ for (i = 0; i < nr; i++)
+ stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
+ }
+
+ return TRUE;
+}
+
+
+void draw_free_temp_verts( struct draw_stage *stage )
+{
+ if (stage->tmp) {
+ FREE( stage->tmp[0] );
+ FREE( stage->tmp );
+ stage->tmp = NULL;
+ }
+}
+
+
+/* Reset vertex ids. This is basically a type of flush.
+ *
+ * Called only from draw_pipe_vbuf.c
+ */
+void draw_reset_vertex_ids(struct draw_context *draw)
+{
+ struct draw_stage *stage = draw->pipeline.first;
+
+ while (stage) {
+ unsigned i;
+
+ for (i = 0; i < stage->nr_tmps; i++)
+ stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
+
+ stage = stage->next;
+ }
+
+ if (draw->pipeline.verts)
+ {
+ unsigned i;
+ char *verts = draw->pipeline.verts;
+ unsigned stride = draw->pipeline.vertex_stride;
+
+ for (i = 0; i < draw->pipeline.vertex_count; i++) {
+ ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID;
+ verts += stride;
+ }
+ }
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
new file mode 100644
index 0000000000..bea90e50d3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -0,0 +1,339 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "draw_private.h"
+#include "draw_pipe.h"
+#include "draw_context.h"
+#include "draw_vbuf.h"
+
+static boolean points( unsigned prim )
+{
+ return (prim == PIPE_PRIM_POINTS);
+}
+
+static boolean lines( unsigned prim )
+{
+ return (prim == PIPE_PRIM_LINES ||
+ prim == PIPE_PRIM_LINE_STRIP ||
+ prim == PIPE_PRIM_LINE_LOOP);
+}
+
+static boolean triangles( unsigned prim )
+{
+ return prim >= PIPE_PRIM_TRIANGLES;
+}
+
+/**
+ * Default version of a function to check if we need any special
+ * pipeline stages, or whether prims/verts can go through untouched.
+ * Don't test for bypass clipping or vs modes, this function is just
+ * about the primitive pipeline stages.
+ *
+ * This can be overridden by the driver.
+ */
+boolean
+draw_need_pipeline(const struct draw_context *draw,
+ const struct pipe_rasterizer_state *rasterizer,
+ unsigned int prim )
+{
+ /* If the driver has overridden this, use that version:
+ */
+ if (draw->render &&
+ draw->render->need_pipeline)
+ {
+ return draw->render->need_pipeline( draw->render,
+ rasterizer,
+ prim );
+ }
+
+ /* Don't have to worry about triangles turning into lines/points
+ * and triggering the pipeline, because we have to trigger the
+ * pipeline *anyway* if unfilled mode is active.
+ */
+ if (lines(prim))
+ {
+ /* line stipple */
+ if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple)
+ return TRUE;
+
+ /* wide lines */
+ if (rasterizer->line_width > draw->pipeline.wide_line_threshold)
+ return TRUE;
+
+ /* AA lines */
+ if (rasterizer->line_smooth && draw->pipeline.aaline)
+ return TRUE;
+ }
+
+ if (points(prim))
+ {
+ /* large points */
+ if (rasterizer->point_size > draw->pipeline.wide_point_threshold)
+ return TRUE;
+
+ /* AA points */
+ if (rasterizer->point_smooth && draw->pipeline.aapoint)
+ return TRUE;
+
+ /* point sprites */
+ if (rasterizer->point_sprite && draw->pipeline.point_sprite)
+ return TRUE;
+ }
+
+
+ if (triangles(prim))
+ {
+ /* polygon stipple */
+ if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple)
+ return TRUE;
+
+ /* unfilled polygons */
+ if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ return TRUE;
+
+ /* polygon offset */
+ if (rasterizer->offset_cw || rasterizer->offset_ccw)
+ return TRUE;
+
+ /* two-side lighting */
+ if (rasterizer->light_twoside)
+ return TRUE;
+ }
+
+ /* polygon cull - this is difficult - hardware can cull just fine
+ * most of the time (though sometimes CULL_NEITHER is unsupported.
+ *
+ * Generally this isn't a reason to require the pipeline, though.
+ *
+ if (rasterizer->cull_mode)
+ return TRUE;
+ */
+
+ return FALSE;
+}
+
+
+
+/**
+ * Rebuild the rendering pipeline.
+ */
+static struct draw_stage *validate_pipeline( struct draw_stage *stage )
+{
+ struct draw_context *draw = stage->draw;
+ struct draw_stage *next = draw->pipeline.rasterize;
+ int need_det = 0;
+ int precalc_flat = 0;
+ boolean wide_lines, wide_points;
+
+ /* Set the validate's next stage to the rasterize stage, so that it
+ * can be found later if needed for flushing.
+ */
+ stage->next = next;
+
+ /* drawing wide lines? */
+ wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold
+ && !draw->rasterizer->line_smooth);
+
+ /* drawing large points? */
+ if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)
+ wide_points = TRUE;
+ else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
+ wide_points = FALSE;
+ else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold)
+ wide_points = TRUE;
+ else
+ wide_points = FALSE;
+
+ /*
+ * NOTE: we build up the pipeline in end-to-start order.
+ *
+ * TODO: make the current primitive part of the state and build
+ * shorter pipelines for lines & points.
+ */
+
+ if (draw->rasterizer->line_smooth && draw->pipeline.aaline) {
+ draw->pipeline.aaline->next = next;
+ next = draw->pipeline.aaline;
+ }
+
+ if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) {
+ draw->pipeline.aapoint->next = next;
+ next = draw->pipeline.aapoint;
+ }
+
+ if (wide_lines) {
+ draw->pipeline.wide_line->next = next;
+ next = draw->pipeline.wide_line;
+ precalc_flat = 1;
+ }
+
+ if (wide_points || draw->rasterizer->point_sprite) {
+ draw->pipeline.wide_point->next = next;
+ next = draw->pipeline.wide_point;
+ }
+
+ if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) {
+ draw->pipeline.stipple->next = next;
+ next = draw->pipeline.stipple;
+ precalc_flat = 1; /* only needed for lines really */
+ }
+
+ if (draw->rasterizer->poly_stipple_enable
+ && draw->pipeline.pstipple) {
+ draw->pipeline.pstipple->next = next;
+ next = draw->pipeline.pstipple;
+ }
+
+ if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+ draw->pipeline.unfilled->next = next;
+ next = draw->pipeline.unfilled;
+ precalc_flat = 1; /* only needed for triangles really */
+ need_det = 1;
+ }
+
+ if (draw->rasterizer->flatshade && precalc_flat) {
+ draw->pipeline.flatshade->next = next;
+ next = draw->pipeline.flatshade;
+ }
+
+ if (draw->rasterizer->offset_cw ||
+ draw->rasterizer->offset_ccw) {
+ draw->pipeline.offset->next = next;
+ next = draw->pipeline.offset;
+ need_det = 1;
+ }
+
+ if (draw->rasterizer->light_twoside) {
+ draw->pipeline.twoside->next = next;
+ next = draw->pipeline.twoside;
+ need_det = 1;
+ }
+
+ /* Always run the cull stage as we calculate determinant there
+ * also.
+ *
+ * This can actually be a win as culling out the triangles can lead
+ * to less work emitting vertices, smaller vertex buffers, etc.
+ * It's difficult to say whether this will be true in general.
+ */
+ if (need_det || draw->rasterizer->cull_mode) {
+ draw->pipeline.cull->next = next;
+ next = draw->pipeline.cull;
+ }
+
+ /* Clip stage
+ */
+ if (!draw->bypass_clipping)
+ {
+ draw->pipeline.clip->next = next;
+ next = draw->pipeline.clip;
+ }
+
+
+ draw->pipeline.first = next;
+
+ if (0) {
+ debug_printf("draw pipeline:\n");
+ for (next = draw->pipeline.first; next ; next = next->next )
+ debug_printf(" %s\n", next->name);
+ debug_printf("\n");
+ }
+
+ return draw->pipeline.first;
+}
+
+static void validate_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->tri( pipeline, header );
+}
+
+static void validate_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->line( pipeline, header );
+}
+
+static void validate_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->point( pipeline, header );
+}
+
+static void validate_reset_stipple_counter( struct draw_stage *stage )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->reset_stipple_counter( pipeline );
+}
+
+static void validate_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ /* May need to pass a backend flush on to the rasterize stage.
+ */
+ if (stage->next)
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void validate_destroy( struct draw_stage *stage )
+{
+ FREE( stage );
+}
+
+
+/**
+ * Create validate pipeline stage.
+ */
+struct draw_stage *draw_validate_stage( struct draw_context *draw )
+{
+ struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
+ if (stage == NULL)
+ return NULL;
+
+ stage->draw = draw;
+ stage->name = "validate";
+ stage->next = NULL;
+ stage->point = validate_point;
+ stage->line = validate_line;
+ stage->tri = validate_tri;
+ stage->flush = validate_flush;
+ stage->reset_stipple_counter = validate_reset_stipple_counter;
+ stage->destroy = validate_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
new file mode 100644
index 0000000000..1a5269c0de
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -0,0 +1,492 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Vertex buffer drawing stage.
+ *
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw_vbuf.h"
+#include "draw_private.h"
+#include "draw_vertex.h"
+#include "draw_pipe.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+
+/**
+ * Vertex buffer emit stage.
+ */
+struct vbuf_stage {
+ struct draw_stage stage; /**< This must be first (base class) */
+
+ struct vbuf_render *render;
+
+ const struct vertex_info *vinfo;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ struct translate *translate;
+
+ /* FIXME: we have no guarantee that 'unsigned' is 32bit */
+
+ /** Vertices in hardware format */
+ unsigned *vertices;
+ unsigned *vertex_ptr;
+ unsigned max_vertices;
+ unsigned nr_vertices;
+
+ /** Indices */
+ ushort *indices;
+ unsigned max_indices;
+ unsigned nr_indices;
+
+ /* Cache point size somewhere it's address won't change:
+ */
+ float point_size;
+
+ struct translate_cache *cache;
+};
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct vbuf_stage *
+vbuf_stage( struct draw_stage *stage )
+{
+ assert(stage);
+ return (struct vbuf_stage *)stage;
+}
+
+
+static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
+static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
+
+
+static INLINE boolean
+overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
+{
+ unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
+ return (used + bytes) > bufsz;
+}
+
+
+static INLINE void
+check_space( struct vbuf_stage *vbuf, unsigned nr )
+{
+ if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
+ vbuf->nr_indices + nr > vbuf->max_indices)
+ {
+ vbuf_flush_vertices( vbuf );
+ vbuf_alloc_vertices( vbuf );
+ }
+}
+
+
+
+
+/**
+ * Extract the needed fields from post-transformed vertex and emit
+ * a hardware(driver) vertex.
+ * Recall that the vertices are constructed by the 'draw' module and
+ * have a couple of slots at the beginning (1-dword header, 4-dword
+ * clip pos) that we ignore here. We only use the vertex->data[] fields.
+ */
+static INLINE ushort
+emit_vertex( struct vbuf_stage *vbuf,
+ struct vertex_header *vertex )
+{
+ if(vertex->vertex_id == UNDEFINED_VERTEX_ID) {
+ /* Hmm - vertices are emitted one at a time - better make sure
+ * set_buffer is efficient. Consider a special one-shot mode for
+ * translate.
+ */
+ /* Note: we really do want data[0] here, not data[pos]:
+ */
+ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
+ vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
+
+ if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
+
+ vbuf->vertex_ptr += vbuf->vertex_size/4;
+ vertex->vertex_id = vbuf->nr_vertices++;
+ }
+
+ return (ushort)vertex->vertex_id;
+}
+
+
+static void
+vbuf_tri( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+ unsigned i;
+
+ check_space( vbuf, 3 );
+
+ if (vbuf->stage.draw->rasterizer->flatshade_first) {
+ /* Put provoking vertex in position expected by the driver.
+ * Emit last provoking vertex in first pos.
+ * Swap verts 0 & 1 to preserve polygon winding.
+ */
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[2] );
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[1] );
+ }
+ else {
+ for (i = 0; i < 3; i++) {
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
+ }
+ }
+}
+
+
+static void
+vbuf_line( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+ unsigned i;
+
+ check_space( vbuf, 2 );
+
+ for (i = 0; i < 2; i++) {
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
+ }
+}
+
+
+static void
+vbuf_point( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ check_space( vbuf, 1 );
+
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
+}
+
+
+
+
+/**
+ * Set the prim type for subsequent vertices.
+ * This may result in a new vertex size. The existing vbuffer (if any)
+ * will be flushed if needed and a new one allocated.
+ */
+static void
+vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
+{
+ struct translate_key hw_key;
+ unsigned dst_offset;
+ unsigned i;
+
+ vbuf->render->set_primitive(vbuf->render, prim);
+
+ /* Must do this after set_primitive() above:
+ *
+ * XXX: need some state managment to track when this needs to be
+ * recalculated. The driver should tell us whether there was a
+ * state change.
+ */
+ vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
+ vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
+
+ /* Translate from pipeline vertices to hw vertices.
+ */
+ dst_offset = 0;
+
+ for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned src_buffer = 0;
+ unsigned output_format;
+ unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
+
+ switch (vbuf->vinfo->attrib[i].emit) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ src_buffer = 1;
+ src_offset = 0;
+ break;
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ emit_sz = 4 * sizeof(ubyte);
+ break;
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ break;
+ }
+
+ hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ hw_key.element[i].input_buffer = src_buffer;
+ hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].output_format = output_format;
+ hw_key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ hw_key.nr_elements = vbuf->vinfo->num_attribs;
+ hw_key.output_stride = vbuf->vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!vbuf->translate ||
+ translate_key_compare(&vbuf->translate->key, &hw_key) != 0)
+ {
+ translate_key_sanitize(&hw_key);
+ vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
+
+ vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0);
+ }
+
+ vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
+
+ /* Allocate new buffer?
+ */
+ assert(vbuf->vertices == NULL);
+ vbuf_alloc_vertices(vbuf);
+}
+
+
+static void
+vbuf_first_tri( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES);
+ stage->tri = vbuf_tri;
+ stage->tri( stage, prim );
+}
+
+
+static void
+vbuf_first_line( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_LINES);
+ stage->line = vbuf_line;
+ stage->line( stage, prim );
+}
+
+
+static void
+vbuf_first_point( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_vertices(vbuf);
+ vbuf_start_prim(vbuf, PIPE_PRIM_POINTS);
+ stage->point = vbuf_point;
+ stage->point( stage, prim );
+}
+
+
+
+/**
+ * Flush existing vertex buffer and allocate a new one.
+ */
+static void
+vbuf_flush_vertices( struct vbuf_stage *vbuf )
+{
+ if(vbuf->vertices) {
+
+ vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 );
+
+ if (vbuf->nr_indices)
+ {
+ vbuf->render->draw(vbuf->render,
+ vbuf->indices,
+ vbuf->nr_indices );
+
+ vbuf->nr_indices = 0;
+ }
+
+ /* Reset temporary vertices ids */
+ if(vbuf->nr_vertices)
+ draw_reset_vertex_ids( vbuf->stage.draw );
+
+ /* Free the vertex buffer */
+ vbuf->render->release_vertices( vbuf->render );
+
+ vbuf->max_vertices = vbuf->nr_vertices = 0;
+ vbuf->vertex_ptr = vbuf->vertices = NULL;
+ }
+}
+
+
+static void
+vbuf_alloc_vertices( struct vbuf_stage *vbuf )
+{
+ assert(!vbuf->nr_indices);
+ assert(!vbuf->vertices);
+
+ /* Allocate a new vertex buffer */
+ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
+
+ /* even number */
+ vbuf->max_vertices = vbuf->max_vertices & ~1;
+
+ if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
+ vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
+
+ /* Must always succeed -- driver gives us a
+ * 'max_vertex_buffer_bytes' which it guarantees it can allocate,
+ * and it will flush itself if necessary to do so. If this does
+ * fail, we are basically without usable hardware.
+ */
+ vbuf->render->allocate_vertices(vbuf->render,
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
+
+ vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render );
+
+ vbuf->vertex_ptr = vbuf->vertices;
+}
+
+
+
+static void
+vbuf_flush( struct draw_stage *stage, unsigned flags )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_vertices( vbuf );
+
+ stage->point = vbuf_first_point;
+ stage->line = vbuf_first_line;
+ stage->tri = vbuf_first_tri;
+}
+
+
+static void
+vbuf_reset_stipple_counter( struct draw_stage *stage )
+{
+ /* XXX: Need to do something here for hardware with linestipple.
+ */
+ (void) stage;
+}
+
+
+static void vbuf_destroy( struct draw_stage *stage )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ if(vbuf->indices)
+ align_free( vbuf->indices );
+
+ if (vbuf->render)
+ vbuf->render->destroy( vbuf->render );
+
+ if (vbuf->cache)
+ translate_cache_destroy(vbuf->cache);
+
+ FREE( stage );
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
+ struct vbuf_render *render )
+{
+ struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
+ if (vbuf == NULL)
+ goto fail;
+
+ vbuf->stage.draw = draw;
+ vbuf->stage.name = "vbuf";
+ vbuf->stage.point = vbuf_first_point;
+ vbuf->stage.line = vbuf_first_line;
+ vbuf->stage.tri = vbuf_first_tri;
+ vbuf->stage.flush = vbuf_flush;
+ vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
+ vbuf->stage.destroy = vbuf_destroy;
+
+ vbuf->render = render;
+ vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1);
+
+ vbuf->indices = (ushort *) align_malloc( vbuf->max_indices *
+ sizeof(vbuf->indices[0]),
+ 16 );
+ if (!vbuf->indices)
+ goto fail;
+
+ vbuf->cache = translate_cache_create();
+ if (!vbuf->cache)
+ goto fail;
+
+
+ vbuf->vertices = NULL;
+ vbuf->vertex_ptr = vbuf->vertices;
+
+ return &vbuf->stage;
+
+ fail:
+ if (vbuf)
+ vbuf_destroy(&vbuf->stage);
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
new file mode 100644
index 0000000000..f32cbef983
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -0,0 +1,181 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw_private.h"
+#include "draw_pipe.h"
+
+
+struct wideline_stage {
+ struct draw_stage stage;
+
+ float half_line_width;
+};
+
+
+
+static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
+{
+ return (struct wideline_stage *)stage;
+}
+
+
+
+/**
+ * Draw a wide line by drawing a quad (two triangles).
+ * XXX need to disable polygon stipple.
+ */
+static void wideline_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ /*const struct wideline_stage *wide = wideline_stage(stage);*/
+ const unsigned pos = stage->draw->vs.position_output;
+ const float half_width = 0.5f * stage->draw->rasterizer->line_width;
+
+ struct prim_header tri;
+
+ struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
+ struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
+ struct vertex_header *v2 = dup_vert(stage, header->v[1], 2);
+ struct vertex_header *v3 = dup_vert(stage, header->v[1], 3);
+
+ float *pos0 = v0->data[pos];
+ float *pos1 = v1->data[pos];
+ float *pos2 = v2->data[pos];
+ float *pos3 = v3->data[pos];
+
+ const float dx = fabsf(pos0[0] - pos2[0]);
+ const float dy = fabsf(pos0[1] - pos2[1]);
+
+ /* small tweak to meet GL specification */
+ const float bias = 0.125f;
+
+ /*
+ * Draw wide line as a quad (two tris) by "stretching" the line along
+ * X or Y.
+ * We need to tweak coords in several ways to be conformant here.
+ */
+
+ if (dx > dy) {
+ /* x-major line */
+ pos0[1] = pos0[1] - half_width - bias;
+ pos1[1] = pos1[1] + half_width - bias;
+ pos2[1] = pos2[1] - half_width - bias;
+ pos3[1] = pos3[1] + half_width - bias;
+ if (pos0[0] < pos2[0]) {
+ /* left to right line */
+ pos0[0] -= 0.5f;
+ pos1[0] -= 0.5f;
+ pos2[0] -= 0.5f;
+ pos3[0] -= 0.5f;
+ }
+ else {
+ /* right to left line */
+ pos0[0] += 0.5f;
+ pos1[0] += 0.5f;
+ pos2[0] += 0.5f;
+ pos3[0] += 0.5f;
+ }
+ }
+ else {
+ /* y-major line */
+ pos0[0] = pos0[0] - half_width + bias;
+ pos1[0] = pos1[0] + half_width + bias;
+ pos2[0] = pos2[0] - half_width + bias;
+ pos3[0] = pos3[0] + half_width + bias;
+ if (pos0[1] < pos2[1]) {
+ /* top to bottom line */
+ pos0[1] -= 0.5f;
+ pos1[1] -= 0.5f;
+ pos2[1] -= 0.5f;
+ pos3[1] -= 0.5f;
+ }
+ else {
+ /* bottom to top line */
+ pos0[1] += 0.5f;
+ pos1[1] += 0.5f;
+ pos2[1] += 0.5f;
+ pos3[1] += 0.5f;
+ }
+ }
+
+ tri.det = header->det; /* only the sign matters */
+ tri.v[0] = v0;
+ tri.v[1] = v2;
+ tri.v[2] = v3;
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v0;
+ tri.v[1] = v3;
+ tri.v[2] = v1;
+ stage->next->tri( stage->next, &tri );
+}
+
+
+static void wideline_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void wideline_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void wideline_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
+{
+ struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage);
+
+ draw_alloc_temp_verts( &wide->stage, 4 );
+
+ wide->stage.draw = draw;
+ wide->stage.name = "wide-line";
+ wide->stage.next = NULL;
+ wide->stage.point = draw_pipe_passthrough_point;
+ wide->stage.line = wideline_line;
+ wide->stage.tri = draw_pipe_passthrough_tri;
+ wide->stage.flush = wideline_flush;
+ wide->stage.reset_stipple_counter = wideline_reset_stipple_counter;
+ wide->stage.destroy = wideline_destroy;
+
+ return &wide->stage;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
new file mode 100644
index 0000000000..7d76a7dbf3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -0,0 +1,333 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/**
+ * Notes on wide points and sprite mode:
+ *
+ * In wide point/sprite mode we effectively need to convert each incoming
+ * vertex into four outgoing vertices specifying the corners of a quad.
+ * Since we don't (yet) have geometry shaders, we have to handle this here
+ * in the draw module.
+ *
+ * For sprites, it also means that this is where we have to handle texcoords
+ * for the vertices of the quad. OpenGL's GL_COORD_REPLACE state specifies
+ * if/how enabled texcoords are automatically generated for sprites. We pass
+ * that info through gallium in the pipe_rasterizer_state::sprite_coord_mode
+ * array.
+ *
+ * Additionally, GLSL's gl_PointCoord fragment attribute has to be handled
+ * here as well. This is basically an additional texture/generic attribute
+ * that varies .x from 0 to 1 horizontally across the point and varies .y
+ * vertically from 0 to 1 down the sprite.
+ *
+ * With geometry shaders, the state tracker could create a GS to do
+ * most/all of this.
+ */
+
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
+
+
+struct widepoint_stage {
+ struct draw_stage stage;
+
+ float half_point_size;
+ float point_size_min;
+ float point_size_max;
+
+ float xbias;
+ float ybias;
+
+ uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
+ uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS];
+ uint num_texcoords;
+
+ int psize_slot;
+
+ int point_coord_fs_input; /**< input for pointcoord */
+};
+
+
+
+static INLINE struct widepoint_stage *
+widepoint_stage( struct draw_stage *stage )
+{
+ return (struct widepoint_stage *)stage;
+}
+
+
+/**
+ * Set the vertex texcoords for sprite mode.
+ * Coords may be left untouched or set to a right-side-up or upside-down
+ * orientation.
+ */
+static void set_texcoords(const struct widepoint_stage *wide,
+ struct vertex_header *v, const float tc[4])
+{
+ uint i;
+ for (i = 0; i < wide->num_texcoords; i++) {
+ if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) {
+ uint j = wide->texcoord_slot[i];
+ v->data[j][0] = tc[0];
+ if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT)
+ v->data[j][1] = 1.0f - tc[1];
+ else
+ v->data[j][1] = tc[1];
+ v->data[j][2] = tc[2];
+ v->data[j][3] = tc[3];
+ }
+ }
+
+ if (wide->point_coord_fs_input >= 0) {
+ /* put gl_PointCoord into the extra vertex slot */
+ uint slot = wide->stage.draw->extra_vp_outputs.slot;
+ v->data[slot][0] = tc[0];
+ v->data[slot][1] = tc[1];
+ v->data[slot][2] = 0.0F;
+ v->data[slot][3] = 1.0F;
+ }
+}
+
+
+/* If there are lots of sprite points (and why wouldn't there be?) it
+ * would probably be more sensible to change hardware setup to
+ * optimize this rather than doing the whole thing in software like
+ * this.
+ */
+static void widepoint_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ const struct widepoint_stage *wide = widepoint_stage(stage);
+ const unsigned pos = stage->draw->vs.position_output;
+ const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
+ float half_size;
+ float left_adj, right_adj, bot_adj, top_adj;
+
+ struct prim_header tri;
+
+ /* four dups of original vertex */
+ struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
+ struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
+ struct vertex_header *v2 = dup_vert(stage, header->v[0], 2);
+ struct vertex_header *v3 = dup_vert(stage, header->v[0], 3);
+
+ float *pos0 = v0->data[pos];
+ float *pos1 = v1->data[pos];
+ float *pos2 = v2->data[pos];
+ float *pos3 = v3->data[pos];
+
+ /* point size is either per-vertex or fixed size */
+ if (wide->psize_slot >= 0) {
+ half_size = header->v[0]->data[wide->psize_slot][0];
+
+ /* XXX: temporary -- do this in the vertex shader??
+ */
+ half_size = CLAMP(half_size,
+ wide->point_size_min,
+ wide->point_size_max);
+
+ half_size *= 0.5f;
+ }
+ else {
+ half_size = wide->half_point_size;
+ }
+
+ left_adj = -half_size + wide->xbias;
+ right_adj = half_size + wide->xbias;
+ bot_adj = half_size + wide->ybias;
+ top_adj = -half_size + wide->ybias;
+
+ pos0[0] += left_adj;
+ pos0[1] += top_adj;
+
+ pos1[0] += left_adj;
+ pos1[1] += bot_adj;
+
+ pos2[0] += right_adj;
+ pos2[1] += top_adj;
+
+ pos3[0] += right_adj;
+ pos3[1] += bot_adj;
+
+ if (sprite) {
+ static const float tex00[4] = { 0, 0, 0, 1 };
+ static const float tex01[4] = { 0, 1, 0, 1 };
+ static const float tex11[4] = { 1, 1, 0, 1 };
+ static const float tex10[4] = { 1, 0, 0, 1 };
+ set_texcoords( wide, v0, tex00 );
+ set_texcoords( wide, v1, tex01 );
+ set_texcoords( wide, v2, tex10 );
+ set_texcoords( wide, v3, tex11 );
+ }
+
+ tri.det = header->det; /* only the sign matters */
+ tri.v[0] = v0;
+ tri.v[1] = v2;
+ tri.v[2] = v3;
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v0;
+ tri.v[1] = v3;
+ tri.v[2] = v1;
+ stage->next->tri( stage->next, &tri );
+}
+
+
+static int
+find_pntc_input_attrib(struct draw_context *draw)
+{
+ /* Scan the fragment program's input decls to find the pointcoord
+ * attribute. The xy components will store the point coord.
+ */
+ return 0; /* XXX fix this */
+}
+
+
+static void widepoint_first_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct widepoint_stage *wide = widepoint_stage(stage);
+ struct draw_context *draw = stage->draw;
+
+ wide->half_point_size = 0.5f * draw->rasterizer->point_size;
+ wide->point_size_min = draw->rasterizer->point_size_min;
+ wide->point_size_max = draw->rasterizer->point_size_max;
+ wide->xbias = 0.0;
+ wide->ybias = 0.0;
+
+ if (draw->rasterizer->gl_rasterization_rules) {
+ wide->xbias = 0.125;
+ }
+
+ /* XXX we won't know the real size if it's computed by the vertex shader! */
+ if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) ||
+ (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) {
+ stage->point = widepoint_point;
+ }
+ else {
+ stage->point = draw_pipe_passthrough_point;
+ }
+
+ if (draw->rasterizer->point_sprite) {
+ /* find vertex shader texcoord outputs */
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ uint i, j = 0;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+ wide->texcoord_slot[j] = i;
+ wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j];
+ j++;
+ }
+ }
+ wide->num_texcoords = j;
+
+ /* find fragment shader PointCoord input */
+ wide->point_coord_fs_input = find_pntc_input_attrib(draw);
+
+ /* setup extra vp output (point coord implemented as a texcoord) */
+ draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+ draw->extra_vp_outputs.semantic_index = 0;
+ draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs;
+ }
+ else {
+ wide->point_coord_fs_input = -1;
+ draw->extra_vp_outputs.slot = 0;
+ }
+
+ wide->psize_slot = -1;
+ if (draw->rasterizer->point_size_per_vertex) {
+ /* find PSIZ vertex output */
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ uint i;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
+ wide->psize_slot = i;
+ break;
+ }
+ }
+ }
+
+ stage->point( stage, header );
+}
+
+
+static void widepoint_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->point = widepoint_first_point;
+ stage->next->flush( stage->next, flags );
+ stage->draw->extra_vp_outputs.slot = 0;
+}
+
+
+static void widepoint_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void widepoint_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
+{
+ struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
+ if (wide == NULL)
+ goto fail;
+
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
+
+ wide->stage.draw = draw;
+ wide->stage.name = "wide-point";
+ wide->stage.next = NULL;
+ wide->stage.point = widepoint_first_point;
+ wide->stage.line = draw_pipe_passthrough_line;
+ wide->stage.tri = draw_pipe_passthrough_tri;
+ wide->stage.flush = widepoint_flush;
+ wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter;
+ wide->stage.destroy = widepoint_destroy;
+
+ return &wide->stage;
+
+ fail:
+ if (wide)
+ wide->stage.destroy( &wide->stage );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
new file mode 100644
index 0000000000..41fcb16a0a
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -0,0 +1,319 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Private data structures, etc for the draw module.
+ */
+
+
+/**
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+
+#ifndef DRAW_PRIVATE_H
+#define DRAW_PRIVATE_H
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+
+#include "tgsi/tgsi_scan.h"
+
+
+struct pipe_context;
+struct gallivm_prog;
+struct gallivm_cpu_engine;
+struct draw_vertex_shader;
+struct draw_context;
+struct draw_stage;
+struct vbuf_render;
+struct tgsi_exec_machine;
+struct tgsi_sampler;
+
+
+/**
+ * Basic vertex info.
+ * Carry some useful information around with the vertices in the prim pipe.
+ */
+struct vertex_header {
+ unsigned clipmask:12;
+ unsigned edgeflag:1;
+ unsigned pad:3;
+ unsigned vertex_id:16;
+
+ float clip[4];
+
+ /* This will probably become float (*data)[4] soon:
+ */
+ float data[][4];
+};
+
+/* NOTE: It should match vertex_id size above */
+#define UNDEFINED_VERTEX_ID 0xffff
+
+
+/**
+ * Private context for the drawing module.
+ */
+struct draw_context
+{
+ /** Drawing/primitive pipeline stages */
+ struct {
+ struct draw_stage *first; /**< one of the following */
+
+ struct draw_stage *validate;
+
+ /* stages (in logical order) */
+ struct draw_stage *flatshade;
+ struct draw_stage *clip;
+ struct draw_stage *cull;
+ struct draw_stage *twoside;
+ struct draw_stage *offset;
+ struct draw_stage *unfilled;
+ struct draw_stage *stipple;
+ struct draw_stage *aapoint;
+ struct draw_stage *aaline;
+ struct draw_stage *pstipple;
+ struct draw_stage *wide_line;
+ struct draw_stage *wide_point;
+ struct draw_stage *rasterize;
+
+ float wide_point_threshold; /**< convert pnts to tris if larger than this */
+ float wide_line_threshold; /**< convert lines to tris if wider than this */
+ boolean line_stipple; /**< do line stipple? */
+ boolean point_sprite; /**< convert points to quads for sprites? */
+
+ /* Temporary storage while the pipeline is being run:
+ */
+ char *verts;
+ unsigned vertex_stride;
+ unsigned vertex_count;
+ } pipeline;
+
+
+ struct vbuf_render *render;
+
+ /* Support prototype passthrough path:
+ */
+ struct {
+ struct {
+ struct draw_pt_middle_end *fetch_emit;
+ struct draw_pt_middle_end *fetch_shade_emit;
+ struct draw_pt_middle_end *general;
+ } middle;
+
+ struct {
+ struct draw_pt_front_end *vcache;
+ struct draw_pt_front_end *varray;
+ } front;
+
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_buffers;
+
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_elements;
+
+ /* user-space vertex data, buffers */
+ struct {
+ const unsigned *edgeflag;
+
+ /** vertex element/index buffer (ex: glDrawElements) */
+ const void *elts;
+ /** bytes per index (0, 1, 2 or 4) */
+ unsigned eltSize;
+ unsigned min_index;
+ unsigned max_index;
+
+ /** vertex arrays */
+ const void *vbuffer[PIPE_MAX_ATTRIBS];
+
+ /** constant buffer (for vertex shader) */
+ const void *constants;
+ } user;
+
+ boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
+ boolean no_fse; /* disable FSE even when it is correct */
+ } pt;
+
+ struct {
+ boolean bypass_clipping;
+ boolean bypass_vs;
+ } driver;
+
+ boolean flushing; /**< debugging/sanity */
+ boolean suspend_flushing; /**< internally set */
+ boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+
+ boolean force_passthrough; /**< never clip or shade */
+
+ double mrd; /**< minimum resolvable depth value, for polygon offset */
+
+ /* pipe state that we need: */
+ const struct pipe_rasterizer_state *rasterizer;
+ struct pipe_viewport_state viewport;
+ boolean identity_viewport;
+
+ struct {
+ struct draw_vertex_shader *vertex_shader;
+ uint num_vs_outputs; /**< convenience, from vertex_shader */
+ uint position_output;
+
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine *machine;
+
+ uint num_samplers;
+ struct tgsi_sampler **samplers;
+
+ /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
+ */
+ struct gallivm_cpu_engine *engine;
+
+ /* Here's another one:
+ */
+ struct aos_machine *aos_machine;
+
+
+ const float (*aligned_constants)[4];
+
+ const float (*aligned_constant_storage)[4];
+ unsigned const_storage_size;
+
+
+ struct translate *fetch;
+ struct translate_cache *fetch_cache;
+ struct translate *emit;
+ struct translate_cache *emit_cache;
+ } vs;
+
+ /* Clip derived state:
+ */
+ float plane[12][4];
+ unsigned nr_planes;
+
+ /* If a prim stage introduces new vertex attributes, they'll be stored here
+ */
+ struct {
+ uint semantic_name;
+ uint semantic_index;
+ int slot;
+ } extra_vp_outputs;
+
+ unsigned reduced_prim;
+
+ void *driver_private;
+};
+
+
+/*******************************************************************************
+ * Vertex shader code:
+ */
+boolean draw_vs_init( struct draw_context *draw );
+void draw_vs_destroy( struct draw_context *draw );
+
+void draw_vs_set_viewport( struct draw_context *,
+ const struct pipe_viewport_state * );
+
+void draw_vs_set_constants( struct draw_context *,
+ const float (*constants)[4],
+ unsigned size );
+
+
+
+
+/*******************************************************************************
+ * Vertex processing (was passthrough) code:
+ */
+boolean draw_pt_init( struct draw_context *draw );
+void draw_pt_destroy( struct draw_context *draw );
+void draw_pt_reset_vertex_ids( struct draw_context *draw );
+
+
+/*******************************************************************************
+ * Primitive processing (pipeline) code:
+ */
+
+boolean draw_pipeline_init( struct draw_context *draw );
+void draw_pipeline_destroy( struct draw_context *draw );
+
+
+
+
+
+/* We use the top few bits in the elts[] parameter to convey a little
+ * API information. This limits the number of vertices we can address
+ * to only 4096 -- if that becomes a problem, we can switch to 32-bit
+ * draw indices.
+ *
+ * These flags expected at first vertex of lines & triangles when
+ * unfilled and/or line stipple modes are operational.
+ */
+#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
+#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
+#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
+#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
+#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
+#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
+#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+
+void draw_pipeline_run( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count );
+
+void draw_pipeline_run_linear( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+
+
+
+void draw_pipeline_flush( struct draw_context *draw,
+ unsigned flags );
+
+
+
+/*******************************************************************************
+ * Flushing
+ */
+
+#define DRAW_FLUSH_STATE_CHANGE 0x8
+#define DRAW_FLUSH_BACKEND 0x10
+
+
+void draw_do_flush( struct draw_context *draw, unsigned flags );
+
+
+
+
+#endif /* DRAW_PRIVATE_H */
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
new file mode 100644
index 0000000000..dbb5ac7182
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -0,0 +1,324 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_math.h"
+#include "util/u_prim.h"
+
+static unsigned trim( unsigned count, unsigned first, unsigned incr )
+{
+ if (count < first)
+ return 0;
+ return count - (count - first) % incr;
+}
+
+
+
+/* Overall we split things into:
+ * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - middle -- fetch, shade, cliptest, viewport
+ * - pipeline -- the prim pipeline: clipping, wide lines, etc
+ * - backend -- the vbuf_render provided by the driver.
+ */
+static boolean
+draw_pt_arrays(struct draw_context *draw,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
+{
+ struct draw_pt_front_end *frontend = NULL;
+ struct draw_pt_middle_end *middle = NULL;
+ unsigned opt = 0;
+
+ /* Sanitize primitive length:
+ */
+ {
+ unsigned first, incr;
+ draw_pt_split_prim(prim, &first, &incr);
+ count = trim(count, first, incr);
+ if (count < first)
+ return TRUE;
+ }
+
+ if (!draw->force_passthrough) {
+ if (!draw->render) {
+ opt |= PT_PIPELINE;
+ }
+
+ if (draw_need_pipeline(draw,
+ draw->rasterizer,
+ prim)) {
+ opt |= PT_PIPELINE;
+ }
+
+ if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ opt |= PT_CLIPTEST;
+ }
+
+ if (!draw->rasterizer->bypass_vs_clip_and_viewport) {
+ opt |= PT_SHADE;
+ }
+ }
+
+ if (opt == 0)
+ middle = draw->pt.middle.fetch_emit;
+ else if (opt == PT_SHADE && !draw->pt.no_fse)
+ middle = draw->pt.middle.fetch_shade_emit;
+ else
+ middle = draw->pt.middle.general;
+
+
+ /* Pick the right frontend
+ */
+ if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
+ frontend = draw->pt.front.vcache;
+ } else {
+ frontend = draw->pt.front.varray;
+ }
+
+ frontend->prepare( frontend, prim, middle, opt );
+
+ frontend->run(frontend,
+ draw_pt_elt_func(draw),
+ draw_pt_elt_ptr(draw, start),
+ count);
+
+ frontend->finish( frontend );
+
+ return TRUE;
+}
+
+
+boolean draw_pt_init( struct draw_context *draw )
+{
+ draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE);
+ draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE);
+
+ draw->pt.front.vcache = draw_pt_vcache( draw );
+ if (!draw->pt.front.vcache)
+ return FALSE;
+
+ draw->pt.front.varray = draw_pt_varray(draw);
+ if (!draw->pt.front.varray)
+ return FALSE;
+
+ draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
+ if (!draw->pt.middle.fetch_emit)
+ return FALSE;
+
+ draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
+ if (!draw->pt.middle.fetch_shade_emit)
+ return FALSE;
+
+ draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
+ if (!draw->pt.middle.general)
+ return FALSE;
+
+ return TRUE;
+}
+
+
+void draw_pt_destroy( struct draw_context *draw )
+{
+ if (draw->pt.middle.general) {
+ draw->pt.middle.general->destroy( draw->pt.middle.general );
+ draw->pt.middle.general = NULL;
+ }
+
+ if (draw->pt.middle.fetch_emit) {
+ draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit );
+ draw->pt.middle.fetch_emit = NULL;
+ }
+
+ if (draw->pt.middle.fetch_shade_emit) {
+ draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit );
+ draw->pt.middle.fetch_shade_emit = NULL;
+ }
+
+ if (draw->pt.front.vcache) {
+ draw->pt.front.vcache->destroy( draw->pt.front.vcache );
+ draw->pt.front.vcache = NULL;
+ }
+
+ if (draw->pt.front.varray) {
+ draw->pt.front.varray->destroy( draw->pt.front.varray );
+ draw->pt.front.varray = NULL;
+ }
+}
+
+
+/**
+ * Debug- print the first 'count' vertices.
+ */
+static void
+draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
+{
+ uint i;
+
+ debug_printf("Draw arrays(prim = %u, start = %u, count = %u)\n",
+ prim, start, count);
+
+ for (i = 0; i < count; i++) {
+ uint ii, j;
+
+ if (draw->pt.user.elts) {
+ /* indexed arrays */
+ switch (draw->pt.user.eltSize) {
+ case 1:
+ {
+ const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+ ii = elem[start + i];
+ }
+ break;
+ case 2:
+ {
+ const ushort *elem = (const ushort *) draw->pt.user.elts;
+ ii = elem[start + i];
+ }
+ break;
+ case 4:
+ {
+ const uint *elem = (const uint *) draw->pt.user.elts;
+ ii = elem[start + i];
+ }
+ break;
+ default:
+ assert(0);
+ }
+ debug_printf("Element[%u + %u] -> Vertex %u:\n", start, i, ii);
+ }
+ else {
+ /* non-indexed arrays */
+ ii = start + i;
+ debug_printf("Vertex %u:\n", ii);
+ }
+
+ for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
+ uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
+ ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
+ ptr += draw->pt.vertex_buffer[buf].stride * ii;
+ ptr += draw->pt.vertex_element[j].src_offset;
+
+ debug_printf(" Attr %u: ", j);
+ switch (draw->pt.vertex_element[j].src_format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f @ %p\n", v[0], (void *) v);
+ }
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v);
+ }
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
+ }
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ {
+ float *v = (float *) ptr;
+ debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
+ (void *) v);
+ }
+ break;
+ default:
+ debug_printf("other format (fix me)\n");
+ ;
+ }
+ }
+ }
+}
+
+
+/**
+ * Draw vertex arrays
+ * This is the main entrypoint into the drawing module.
+ * \param prim one of PIPE_PRIM_x
+ * \param start index of first vertex to draw
+ * \param count number of vertices to draw
+ */
+void
+draw_arrays(struct draw_context *draw, unsigned prim,
+ unsigned start, unsigned count)
+{
+ unsigned reduced_prim = u_reduced_prim(prim);
+ if (reduced_prim != draw->reduced_prim) {
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->reduced_prim = reduced_prim;
+ }
+
+ if (0)
+ draw_print_arrays(draw, prim, start, MIN2(count, 20));
+
+#if 0
+ {
+ int i;
+ debug_printf("draw_arrays(prim=%u start=%u count=%u):\n",
+ prim, start, count);
+ tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
+ debug_printf("Elements:\n");
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ debug_printf(" format=%s comps=%u\n",
+ pf_name(draw->pt.vertex_element[i].src_format),
+ draw->pt.vertex_element[i].nr_components);
+ }
+ debug_printf("Buffers:\n");
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ debug_printf(" stride=%u offset=%u ptr=%p\n",
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].buffer_offset,
+ draw->pt.user.vbuffer[i]);
+ }
+ }
+#endif
+
+ /* drawing done here: */
+ draw_pt_arrays(draw, prim, start, count);
+}
+
+boolean draw_pt_get_edgeflag( struct draw_context *draw,
+ unsigned idx )
+{
+ if (draw->pt.user.edgeflag)
+ return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
+ else
+ return 1;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
new file mode 100644
index 0000000000..7a17a9fb6b
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -0,0 +1,233 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef DRAW_PT_H
+#define DRAW_PT_H
+
+#include "pipe/p_compiler.h"
+
+typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
+
+struct draw_pt_middle_end;
+struct draw_context;
+
+
+#define PT_SHADE 0x1
+#define PT_CLIPTEST 0x2
+#define PT_PIPELINE 0x4
+#define PT_MAX_MIDDLE 0x8
+
+
+/* The "front end" - prepare sets of fetch, draw elements for the
+ * middle end.
+ *
+ * Currenly one version of this:
+ * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
+ * Later:
+ * - varray, varray_split
+ * - velement, velement_split
+ *
+ * Currenly only using the vcache version.
+ */
+struct draw_pt_front_end {
+ void (*prepare)( struct draw_pt_front_end *,
+ unsigned prim,
+ struct draw_pt_middle_end *,
+ unsigned opt );
+
+ void (*run)( struct draw_pt_front_end *,
+ pt_elt_func elt_func,
+ const void *elt_ptr,
+ unsigned count );
+
+ void (*finish)( struct draw_pt_front_end * );
+ void (*destroy)( struct draw_pt_front_end * );
+};
+
+
+/* The "middle end" - prepares actual hardware vertices for the
+ * hardware backend.
+ *
+ * Currently two versions of this:
+ * - fetch, vertex shade, cliptest, prim-pipeline
+ * - fetch, emit (ie passthrough)
+ */
+struct draw_pt_middle_end {
+ void (*prepare)( struct draw_pt_middle_end *,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices );
+
+ void (*run)( struct draw_pt_middle_end *,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count );
+
+ void (*run_linear)(struct draw_pt_middle_end *,
+ unsigned start,
+ unsigned count);
+
+ /* Transform all vertices in a linear range and then draw them with
+ * the supplied element list. May fail and return FALSE.
+ */
+ boolean (*run_linear_elts)( struct draw_pt_middle_end *,
+ unsigned fetch_start,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count );
+
+ int (*get_max_vertex_count)( struct draw_pt_middle_end * );
+
+ void (*finish)( struct draw_pt_middle_end * );
+ void (*destroy)( struct draw_pt_middle_end * );
+};
+
+
+/* The "back end" - supplied by the driver, defined in draw_vbuf.h.
+ */
+struct vbuf_render;
+struct vertex_header;
+
+
+/* Helper functions.
+ */
+pt_elt_func draw_pt_elt_func( struct draw_context *draw );
+const void *draw_pt_elt_ptr( struct draw_context *draw,
+ unsigned start );
+
+/* Frontends:
+ *
+ * Currently only the general-purpose vcache implementation, could add
+ * a special case for tiny vertex buffers.
+ */
+struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
+struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+
+
+/* Middle-ends:
+ *
+ * Currently one general-purpose case which can do all possibilities,
+ * at the slight expense of creating a vertex_header in some cases
+ * unecessarily.
+ *
+ * The special case fetch_emit code avoids pipeline vertices
+ * altogether and builds hardware vertices directly from API
+ * vertex_elements.
+ */
+struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
+
+
+/* More helpers:
+ */
+boolean draw_pt_get_edgeflag( struct draw_context *draw,
+ unsigned idx );
+
+
+/*******************************************************************************
+ * HW vertex emit:
+ */
+struct pt_emit;
+
+void draw_pt_emit_prepare( struct pt_emit *emit,
+ unsigned prim,
+ unsigned *max_vertices );
+
+void draw_pt_emit( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count );
+
+void draw_pt_emit_linear( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned stride,
+ unsigned count );
+
+void draw_pt_emit_destroy( struct pt_emit *emit );
+
+struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
+
+
+/*******************************************************************************
+ * API vertex fetch:
+ */
+
+struct pt_fetch;
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ unsigned vertex_input_count,
+ unsigned vertex_size );
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts );
+
+void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
+ unsigned start,
+ unsigned count,
+ char *verts );
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch );
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
+
+/*******************************************************************************
+ * Post-VS: cliptest, rhw, viewport
+ */
+struct pt_post_vs;
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned stride,
+ unsigned count );
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean bypass_viewport,
+ boolean opengl );
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
+
+
+/*******************************************************************************
+ * Utils:
+ */
+void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
new file mode 100644
index 0000000000..4ca5b52020
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -0,0 +1,182 @@
+
+
+static void FUNC( ARGS,
+ unsigned count )
+{
+ LOCAL_VARS;
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ POINT( (i + 0) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ LINE( DRAW_PIPE_RESET_STIPPLE,
+ (i + 0),
+ (i + 1));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( flags,
+ (i - 1),
+ (i ));
+ }
+
+ LINE( flags,
+ (i - 1),
+ (0 ));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( flags,
+ (i - 1),
+ (i ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3) {
+ if (flatfirst) {
+ /* put provoking vertex in last pos for clipper */
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 1),
+ (i + 2),
+ (i + 0 ));
+ }
+ else {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0),
+ (i + 1),
+ (i + 2 ));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 1 + (i&1)),
+ (i + 2 - (i&1)),
+ (i + 0) );
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0 + (i&1)),
+ (i + 1 - (i&1)),
+ (i + 2 ));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 2),
+ 0,
+ (i + 1) );
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (0),
+ (i + 1),
+ (i + 2 ));
+ }
+ }
+ }
+ break;
+
+
+ case PIPE_PRIM_QUADS:
+ if (flatfirst) {
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 1),
+ (i + 2),
+ (i + 3),
+ (i + 0) );
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 0),
+ (i + 1),
+ (i + 2),
+ (i + 3));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 1),
+ (i + 3),
+ (i + 2),
+ (i + 0) );
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 2),
+ (i + 0),
+ (i + 1),
+ (i + 3));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+
+ for (i = 0; i+2 < count; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE( flags,
+ (i + 1),
+ (i + 2),
+ (0));
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ FLUSH;
+}
+
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
new file mode 100644
index 0000000000..88f4d9f495
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_elts.c
@@ -0,0 +1,89 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_pt.h"
+#include "draw/draw_private.h"
+
+/* Neat get_elt func that also works for varrays drawing by encoding
+ * the start value into a pointer.
+ */
+
+static unsigned elt_uint( const void *elts, unsigned idx )
+{
+ return *(((const uint *)elts) + idx);
+}
+
+static unsigned elt_ushort( const void *elts, unsigned idx )
+{
+ return *(((const ushort *)elts) + idx);
+}
+
+static unsigned elt_ubyte( const void *elts, unsigned idx )
+{
+ return *(((const ubyte *)elts) + idx);
+}
+
+static unsigned elt_vert( const void *elts, unsigned idx )
+{
+ /* unsigned index is packed in the pointer */
+ return (unsigned)(uintptr_t)elts + idx;
+}
+
+pt_elt_func draw_pt_elt_func( struct draw_context *draw )
+{
+ switch (draw->pt.user.eltSize) {
+ case 0: return &elt_vert;
+ case 1: return &elt_ubyte;
+ case 2: return &elt_ushort;
+ case 4: return &elt_uint;
+ default: return NULL;
+ }
+}
+
+const void *draw_pt_elt_ptr( struct draw_context *draw,
+ unsigned start )
+{
+ const char *elts = draw->pt.user.elts;
+
+ switch (draw->pt.user.eltSize) {
+ case 0:
+ return (const void *)(((const ubyte *)NULL) + start);
+ case 1:
+ return (const void *)(((const ubyte *)elts) + start);
+ case 2:
+ return (const void *)(((const ushort *)elts) + start);
+ case 4:
+ return (const void *)(((const uint *)elts) + start);
+ default:
+ return NULL;
+ }
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
new file mode 100644
index 0000000000..064e16c295
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -0,0 +1,313 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+struct pt_emit {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ struct translate_cache *cache;
+ unsigned prim;
+
+ const struct vertex_info *vinfo;
+};
+
+void draw_pt_emit_prepare( struct pt_emit *emit,
+ unsigned prim,
+ unsigned *max_vertices )
+{
+ struct draw_context *draw = emit->draw;
+ const struct vertex_info *vinfo;
+ unsigned dst_offset;
+ struct translate_key hw_key;
+ unsigned i;
+ boolean ok;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+
+ /* XXX: may need to defensively reset this later on as clipping can
+ * clobber this state in the render backend.
+ */
+ emit->prim = prim;
+
+ ok = draw->render->set_primitive(draw->render, emit->prim);
+ if (!ok) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
+
+
+ /* Translate from pipeline vertices to hw vertices.
+ */
+ dst_offset = 0;
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned src_buffer = 0;
+ unsigned output_format;
+ unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
+
+
+
+ switch (vinfo->attrib[i].emit) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ src_buffer = 1;
+ src_offset = 0;
+ break;
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ emit_sz = 4 * sizeof(ubyte);
+ break;
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ break;
+ }
+
+ hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ hw_key.element[i].input_buffer = src_buffer;
+ hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].output_format = output_format;
+ hw_key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ hw_key.nr_elements = vinfo->num_attribs;
+ hw_key.output_stride = vinfo->size * 4;
+
+ if (!emit->translate ||
+ translate_key_compare(&emit->translate->key, &hw_key) != 0)
+ {
+ translate_key_sanitize(&hw_key);
+ emit->translate = translate_cache_find(emit->cache, &hw_key);
+ }
+
+ *max_vertices = (draw->render->max_vertex_buffer_bytes /
+ (vinfo->size * 4));
+
+ /* even number */
+ *max_vertices = *max_vertices & ~1;
+}
+
+
+void draw_pt_emit( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count )
+{
+ struct draw_context *draw = emit->draw;
+ struct translate *translate = emit->translate;
+ struct vbuf_render *render = draw->render;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (vertex_count == 0)
+ return;
+
+ if (vertex_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
+ /* XXX: and work out some way to coordinate the render primitive
+ * between vbuf.c and here...
+ */
+ if (!draw->render->set_primitive(draw->render, emit->prim)) {
+ assert(0);
+ return;
+ }
+
+ render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)vertex_count);
+
+ hw_verts = render->map_vertices( render );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ translate->set_buffer(translate,
+ 0,
+ vertex_data,
+ stride );
+
+ translate->set_buffer(translate,
+ 1,
+ &draw->rasterizer->point_size,
+ 0);
+
+ translate->run( translate,
+ 0,
+ vertex_count,
+ hw_verts );
+
+ render->unmap_vertices( render,
+ 0,
+ vertex_count - 1 );
+
+ render->draw(render,
+ elts,
+ count);
+
+ render->release_vertices(render);
+}
+
+
+void draw_pt_emit_linear(struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned stride,
+ unsigned count)
+{
+ struct draw_context *draw = emit->draw;
+ struct translate *translate = emit->translate;
+ struct vbuf_render *render = draw->render;
+ void *hw_verts;
+
+#if 0
+ debug_printf("Linear emit\n");
+#endif
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ /* XXX: and work out some way to coordinate the render primitive
+ * between vbuf.c and here...
+ */
+ if (!draw->render->set_primitive(draw->render, emit->prim))
+ goto fail;
+
+ if (!render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count))
+ goto fail;
+
+ hw_verts = render->map_vertices( render );
+ if (!hw_verts)
+ goto fail;
+
+ translate->set_buffer(translate, 0,
+ vertex_data, stride);
+
+ translate->set_buffer(translate, 1,
+ &draw->rasterizer->point_size,
+ 0);
+
+ translate->run(translate,
+ 0,
+ count,
+ hw_verts);
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
+ draw_dump_emitted_vertex( emit->vinfo,
+ (const uint8_t *)hw_verts +
+ translate->key.output_stride * i );
+ }
+ }
+
+ render->unmap_vertices( render, 0, count - 1 );
+
+ render->draw_arrays(render, 0, count);
+
+ render->release_vertices(render);
+
+ return;
+
+fail:
+ assert(0);
+ return;
+}
+
+struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
+{
+ struct pt_emit *emit = CALLOC_STRUCT(pt_emit);
+ if (!emit)
+ return NULL;
+
+ emit->draw = draw;
+ emit->cache = translate_cache_create();
+ if (!emit->cache) {
+ FREE(emit);
+ return NULL;
+ }
+
+ return emit;
+}
+
+void draw_pt_emit_destroy( struct pt_emit *emit )
+{
+ if (emit->cache)
+ translate_cache_destroy(emit->cache);
+
+ FREE(emit);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
new file mode 100644
index 0000000000..65c3a34c34
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -0,0 +1,234 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+
+struct pt_fetch {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ unsigned vertex_size;
+ boolean need_edgeflags;
+
+ struct translate_cache *cache;
+};
+
+/* Perform the fetch from API vertex elements & vertex buffers, to a
+ * contiguous set of float[4] attributes as required for the
+ * vertex_shader->run_linear() method.
+ *
+ * This is used in all cases except pure passthrough
+ * (draw_pt_fetch_emit.c) which has its own version to translate
+ * directly to hw vertices.
+ *
+ */
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ unsigned vs_input_count,
+ unsigned vertex_size )
+{
+ struct draw_context *draw = fetch->draw;
+ unsigned nr_inputs;
+ unsigned i, nr = 0;
+ unsigned dst_offset = 0;
+ struct translate_key key;
+
+ fetch->vertex_size = vertex_size;
+
+ /* Always emit/leave space for a vertex header.
+ *
+ * It's worth considering whether the vertex headers should contain
+ * a pointer to the 'data', rather than having it inline.
+ * Something to look at after we've fully switched over to the pt
+ * paths.
+ */
+ {
+ /* Need to set header->vertex_id = 0xffff somehow.
+ */
+ key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
+ key.element[nr].input_offset = 0;
+ key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+ dst_offset += 1 * sizeof(float);
+ nr++;
+
+
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
+ }
+
+ assert( draw->pt.nr_vertex_elements >= vs_input_count );
+
+ nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements );
+
+ for (i = 0; i < nr_inputs; i++) {
+ key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
+ key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
+ key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
+ key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+
+ dst_offset += 4 * sizeof(float);
+ nr++;
+ }
+
+ assert(dst_offset <= vertex_size);
+
+ key.nr_elements = nr;
+ key.output_stride = vertex_size;
+
+
+ if (!fetch->translate ||
+ translate_key_compare(&fetch->translate->key, &key) != 0)
+ {
+ translate_key_sanitize(&key);
+ fetch->translate = translate_cache_find(fetch->cache, &key);
+
+ {
+ static struct vertex_header vh = { 0, 1, 0, UNDEFINED_VERTEX_ID, { .0f, .0f, .0f, .0f } };
+ fetch->translate->set_buffer(fetch->translate,
+ draw->pt.nr_vertex_buffers,
+ &vh,
+ 0);
+ }
+ }
+
+ fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) &&
+ draw->pt.user.edgeflag);
+}
+
+
+
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].stride );
+ }
+
+ translate->run_elts( translate,
+ elts,
+ count,
+ verts );
+
+ /* Edgeflags are hard to fit into a translate program, populate
+ * them separately if required. In the setup above they are
+ * defaulted to one, so only need this if there is reason to change
+ * that default:
+ */
+ if (fetch->need_edgeflags) {
+ for (i = 0; i < count; i++) {
+ struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
+ vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] );
+ }
+ }
+}
+
+
+void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
+ unsigned start,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].stride );
+ }
+
+ translate->run( translate,
+ start,
+ count,
+ verts );
+
+ /* Edgeflags are hard to fit into a translate program, populate
+ * them separately if required. In the setup above they are
+ * defaulted to one, so only need this if there is reason to change
+ * that default:
+ */
+ if (fetch->need_edgeflags) {
+ for (i = 0; i < count; i++) {
+ struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
+ vh->edgeflag = draw_pt_get_edgeflag( draw, start + i );
+ }
+ }
+}
+
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
+{
+ struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
+ if (!fetch)
+ return NULL;
+
+ fetch->draw = draw;
+ fetch->cache = translate_cache_create();
+ if (!fetch->cache) {
+ FREE(fetch);
+ return NULL;
+ }
+
+ return fetch;
+}
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch )
+{
+ if (fetch->cache)
+ translate_cache_destroy(fetch->cache);
+
+ FREE(fetch);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
new file mode 100644
index 0000000000..e7fe6b3b76
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -0,0 +1,437 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+/* The simplest 'middle end' in the new vertex code.
+ *
+ * The responsibilities of a middle end are to:
+ * - perform vertex fetch using
+ * - draw vertex element/buffer state
+ * - a list of fetch indices we received as an input
+ * - run the vertex shader
+ * - cliptest,
+ * - clip coord calculation
+ * - viewport transformation
+ * - if necessary, run the primitive pipeline, passing it:
+ * - a linear array of vertex_header vertices constructed here
+ * - a set of draw indices we received as an input
+ * - otherwise, drive the hw backend,
+ * - allocate space for hardware format vertices
+ * - translate the vertex-shader output vertices to hw format
+ * - calling the backend draw functions.
+ *
+ * For convenience, we provide a helper function to drive the hardware
+ * backend given similar inputs to those required to run the pipeline.
+ *
+ * In the case of passthrough mode, many of these actions are disabled
+ * or noops, so we end up doing:
+ *
+ * - perform vertex fetch
+ * - drive the hw backend
+ *
+ * IE, basically just vertex fetch to post-vs-format vertices,
+ * followed by a call to the backend helper function.
+ */
+
+
+struct fetch_emit_middle_end {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+ struct translate *translate;
+ const struct vertex_info *vinfo;
+
+ /* Cache point size somewhere it's address won't change:
+ */
+ float point_size;
+
+ struct translate_cache *cache;
+};
+
+
+
+
+static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ const struct vertex_info *vinfo;
+ unsigned i, dst_offset;
+ boolean ok;
+ struct translate_key key;
+
+
+ ok = draw->render->set_primitive( draw->render,
+ prim );
+ if (!ok) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render);
+
+
+
+ /* Transform from API vertices to HW vertices, skipping the
+ * pipeline_vertex intermediate step.
+ */
+ dst_offset = 0;
+ memset(&key, 0, sizeof(key));
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index];
+
+ unsigned emit_sz = 0;
+ unsigned input_format = src->src_format;
+ unsigned input_buffer = src->vertex_buffer_index;
+ unsigned input_offset = src->src_offset;
+ unsigned output_format;
+
+ switch (vinfo->attrib[i].emit) {
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ emit_sz = 4 * sizeof(unsigned char);
+ break;
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ input_format = PIPE_FORMAT_R32_FLOAT;
+ input_buffer = draw->pt.nr_vertex_buffers;
+ input_offset = 0;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_OMIT:
+ continue;
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ continue;
+ }
+
+ key.element[i].input_format = input_format;
+ key.element[i].input_buffer = input_buffer;
+ key.element[i].input_offset = input_offset;
+ key.element[i].output_format = output_format;
+ key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ key.nr_elements = vinfo->num_attribs;
+ key.output_stride = vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!feme->translate ||
+ translate_key_compare(&feme->translate->key, &key) != 0)
+ {
+ translate_key_sanitize(&key);
+ feme->translate = translate_cache_find(feme->cache,
+ &key);
+
+
+ feme->translate->set_buffer(feme->translate,
+ draw->pt.nr_vertex_buffers,
+ &feme->point_size,
+ 0);
+ }
+
+ feme->point_size = draw->rasterizer->point_size;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ feme->translate->set_buffer(feme->translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].stride );
+ }
+
+ *max_vertices = (draw->render->max_vertex_buffer_bytes /
+ (vinfo->size * 4));
+
+ /* Return an even number of verts.
+ * This prevents "parity" errors when splitting long triangle strips which
+ * can lead to front/back culling mix-ups.
+ * Every other triangle in a strip has an alternate front/back orientation
+ * so splitting at an odd position can cause the orientation of subsequent
+ * triangles to get reversed.
+ */
+ *max_vertices = *max_vertices & ~1;
+}
+
+
+
+
+
+static void fetch_emit_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (fetch_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
+ draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)fetch_count );
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run_elts( feme->translate,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < fetch_count; i++) {
+ debug_printf("\n\nvertex %d:\n", i);
+ draw_dump_emitted_vertex( feme->vinfo,
+ (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
+ }
+ }
+
+ draw->render->unmap_vertices( draw->render,
+ 0,
+ (ushort)(fetch_count - 1) );
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render );
+
+}
+
+
+static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run( feme->translate,
+ start,
+ count,
+ hw_verts );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ debug_printf("\n\nvertex %d:\n", i);
+ draw_dump_emitted_vertex( feme->vinfo,
+ (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
+ }
+ }
+
+ draw->render->unmap_vertices( draw->render, 0, count - 1 );
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render, 0, count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render );
+ return;
+
+fail:
+ assert(0);
+ return;
+}
+
+
+static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ return FALSE;
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run( feme->translate,
+ start,
+ count,
+ hw_verts );
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render );
+
+ return TRUE;
+}
+
+
+
+
+static void fetch_emit_finish( struct draw_pt_middle_end *middle )
+{
+ /* nothing to do */
+}
+
+static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+
+ if (feme->cache)
+ translate_cache_destroy(feme->cache);
+
+ FREE(middle);
+}
+
+
+struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
+{
+ struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end );
+ if (fetch_emit == NULL)
+ return NULL;
+
+ fetch_emit->cache = translate_cache_create();
+ if (!fetch_emit->cache) {
+ FREE(fetch_emit);
+ return NULL;
+ }
+
+ fetch_emit->base.prepare = fetch_emit_prepare;
+ fetch_emit->base.run = fetch_emit_run;
+ fetch_emit->base.run_linear = fetch_emit_run_linear;
+ fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts;
+ fetch_emit->base.finish = fetch_emit_finish;
+ fetch_emit->base.destroy = fetch_emit_destroy;
+
+ fetch_emit->draw = draw;
+
+ return &fetch_emit->base;
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
new file mode 100644
index 0000000000..44147aed9b
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -0,0 +1,429 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+
+#include "translate/translate.h"
+
+struct fetch_shade_emit;
+
+
+/* Prototype fetch, shade, emit-hw-verts all in one go.
+ */
+struct fetch_shade_emit {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+
+ /* Temporaries:
+ */
+ const float *constants;
+ unsigned pitch[PIPE_MAX_ATTRIBS];
+ const ubyte *src[PIPE_MAX_ATTRIBS];
+ unsigned prim;
+
+ struct draw_vs_varient_key key;
+ struct draw_vs_varient *active;
+
+
+ const struct vertex_info *vinfo;
+};
+
+
+
+
+static void fse_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
+ const struct vertex_info *vinfo;
+ unsigned i;
+ unsigned nr_vbs = 0;
+
+
+ if (!draw->render->set_primitive( draw->render,
+ prim )) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
+
+
+
+ fse->key.output_stride = vinfo->size * 4;
+ fse->key.nr_outputs = vinfo->num_attribs;
+ fse->key.nr_inputs = num_vs_inputs;
+
+ fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */
+ fse->key.nr_inputs); /* inputs - fetch from api format */
+
+ fse->key.viewport = (!draw->rasterizer->bypass_vs_clip_and_viewport &&
+ !draw->identity_viewport);
+ fse->key.clip = !draw->bypass_clipping;
+ fse->key.const_vbuffers = 0;
+
+ memset(fse->key.element, 0,
+ fse->key.nr_elements * sizeof(fse->key.element[0]));
+
+ for (i = 0; i < num_vs_inputs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].in.format = src->src_format;
+
+ /* Consider ignoring these, ie make generated programs
+ * independent of this state:
+ */
+ fse->key.element[i].in.buffer = src->vertex_buffer_index;
+ fse->key.element[i].in.offset = src->src_offset;
+ nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1);
+ }
+
+ for (i = 0; i < 5 && i < nr_vbs; i++) {
+ if (draw->pt.vertex_buffer[i].stride == 0)
+ fse->key.const_vbuffers |= (1<<i);
+ }
+
+ if (0) debug_printf("%s: lookup const_vbuffers: %x\n", __FUNCTION__, fse->key.const_vbuffers);
+
+ {
+ unsigned dst_offset = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+
+ switch (vinfo->attrib[i].emit) {
+ case EMIT_4F:
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_4UB:
+ emit_sz = 4 * sizeof(ubyte);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* The elements in the key correspond to vertex shader output
+ * numbers, not to positions in the hw vertex description --
+ * that's handled by the output_offset field.
+ */
+ fse->key.element[i].out.format = vinfo->attrib[i].emit;
+ fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index;
+ fse->key.element[i].out.offset = dst_offset;
+
+ dst_offset += emit_sz;
+ assert(fse->key.output_stride >= dst_offset);
+ }
+ }
+
+
+ fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
+ &fse->key );
+
+ if (!fse->active) {
+ assert(0);
+ return ;
+ }
+
+ if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__,
+ fse->active->key.const_vbuffers);
+
+ /* Now set buffer pointers:
+ */
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ fse->active->set_buffer( fse->active,
+ i,
+ ((const ubyte *) draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].stride );
+ }
+
+ *max_vertices = (draw->render->max_vertex_buffer_bytes /
+ (vinfo->size * 4));
+
+ /* Return an even number of verts.
+ * This prevents "parity" errors when splitting long triangle strips which
+ * can lead to front/back culling mix-ups.
+ * Every other triangle in a strip has an alternate front/back orientation
+ * so splitting at an odd position can cause the orientation of subsequent
+ * triangles to get reversed.
+ */
+ *max_vertices = *max_vertices & ~1;
+
+ /* Probably need to do this somewhere (or fix exec shader not to
+ * need it):
+ */
+ if (1) {
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ vs->prepare(vs, draw);
+ }
+
+
+ //return TRUE;
+}
+
+
+
+
+
+
+
+static void fse_run_linear( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping is done elsewhere -- either by the API or on hardware,
+ * or for some other reason not required...
+ */
+ fse->active->run_linear( fse->active,
+ start, count,
+ hw_verts );
+
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i,
+ fse->key.output_stride,
+ fse->key.output_stride * i);
+
+ draw_dump_emitted_vertex( fse->vinfo,
+ (const uint8_t *)hw_verts + fse->key.output_stride * i );
+ }
+ }
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
+
+ draw->render->release_vertices( draw->render );
+
+ return;
+
+fail:
+ assert(0);
+ return;
+}
+
+
+static void
+fse_run(struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (fetch_count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)fetch_count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
+
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ */
+ fse->active->run_elts( fse->active,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < fetch_count; i++) {
+ debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
+ draw_dump_emitted_vertex( fse->vinfo,
+ (const uint8_t *)hw_verts +
+ fse->key.output_stride * i );
+ }
+ }
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+
+ draw->render->release_vertices( draw->render );
+ return;
+
+fail:
+ assert(0);
+ return;
+}
+
+
+
+static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ return FALSE;
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping is done elsewhere -- either by the API or on hardware,
+ * or for some other reason not required...
+ */
+ fse->active->run_linear( fse->active,
+ start, count,
+ hw_verts );
+
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
+ draw->render->release_vertices( draw->render );
+
+ return TRUE;
+}
+
+
+
+static void fse_finish( struct draw_pt_middle_end *middle )
+{
+}
+
+
+static void
+fse_destroy( struct draw_pt_middle_end *middle )
+{
+ FREE(middle);
+}
+
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
+{
+ struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
+ if (!fse)
+ return NULL;
+
+ fse->base.prepare = fse_prepare;
+ fse->base.run = fse_run;
+ fse->base.run_linear = fse_run_linear;
+ fse->base.run_linear_elts = fse_run_linear_elts;
+ fse->base.finish = fse_finish;
+ fse->base.destroy = fse_destroy;
+ fse->draw = draw;
+
+ return &fse->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
new file mode 100644
index 0000000000..df6c265b7e
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -0,0 +1,395 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "translate/translate.h"
+
+
+struct fetch_pipeline_middle_end {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+ struct pt_emit *emit;
+ struct pt_fetch *fetch;
+ struct pt_post_vs *post_vs;
+
+ unsigned vertex_data_offset;
+ unsigned vertex_size;
+ unsigned prim;
+ unsigned opt;
+};
+
+
+static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+
+ /* Add one to num_outputs because the pipeline occasionally tags on
+ * an additional texcoord, eg for AA lines.
+ */
+ unsigned nr = MAX2( vs->info.num_inputs,
+ vs->info.num_outputs + 1 );
+
+ fpme->prim = prim;
+ fpme->opt = opt;
+
+ /* Always leave room for the vertex header whether we need it or
+ * not. It's hard to get rid of it in particular because of the
+ * viewport code in draw_pt_post_vs.c.
+ */
+ fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+
+
+ draw_pt_fetch_prepare( fpme->fetch,
+ vs->info.num_inputs,
+ fpme->vertex_size );
+ /* XXX: it's not really gl rasterization rules we care about here,
+ * but gl vs dx9 clip spaces.
+ */
+ draw_pt_post_vs_prepare( fpme->post_vs,
+ (boolean)draw->bypass_clipping,
+ (boolean)(draw->identity_viewport ||
+ draw->rasterizer->bypass_vs_clip_and_viewport),
+ (boolean)draw->rasterizer->gl_rasterization_rules );
+
+
+ if (!(opt & PT_PIPELINE)) {
+ draw_pt_emit_prepare( fpme->emit,
+ prim,
+ max_vertices );
+
+ *max_vertices = MAX2( *max_vertices,
+ DRAW_PIPE_MAX_VERTICES );
+ }
+ else {
+ *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ }
+
+ /* return even number */
+ *max_vertices = *max_vertices & ~1;
+
+ /* No need to prepare the shader.
+ */
+ vs->prepare(vs, draw);
+}
+
+
+
+static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( fetch_count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run( fpme->fetch,
+ fetch_elts,
+ fetch_count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, eg if
+ * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+ * already in the correct place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+
+ FREE(pipeline_verts);
+}
+
+
+static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count)
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ start,
+ count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie if
+ * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+ * already in the correct place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run_linear( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size);
+ }
+ else {
+ draw_pt_emit_linear( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fpme->vertex_size,
+ count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
+
+static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts)
+ return FALSE;
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ start,
+ count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie if
+ * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+ * already in the correct place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+ FREE(pipeline_verts);
+ return TRUE;
+}
+
+
+
+static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
+{
+ /* nothing to do */
+}
+
+static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+
+ if (fpme->fetch)
+ draw_pt_fetch_destroy( fpme->fetch );
+
+ if (fpme->emit)
+ draw_pt_emit_destroy( fpme->emit );
+
+ if (fpme->post_vs)
+ draw_pt_post_vs_destroy( fpme->post_vs );
+
+ FREE(middle);
+}
+
+
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw )
+{
+ struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end );
+ if (!fpme)
+ goto fail;
+
+ fpme->base.prepare = fetch_pipeline_prepare;
+ fpme->base.run = fetch_pipeline_run;
+ fpme->base.run_linear = fetch_pipeline_linear_run;
+ fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts;
+ fpme->base.finish = fetch_pipeline_finish;
+ fpme->base.destroy = fetch_pipeline_destroy;
+
+ fpme->draw = draw;
+
+ fpme->fetch = draw_pt_fetch_create( draw );
+ if (!fpme->fetch)
+ goto fail;
+
+ fpme->post_vs = draw_pt_post_vs_create( draw );
+ if (!fpme->post_vs)
+ goto fail;
+
+ fpme->emit = draw_pt_emit_create( draw );
+ if (!fpme->emit)
+ goto fail;
+
+ return &fpme->base;
+
+ fail:
+ if (fpme)
+ fetch_pipeline_destroy( &fpme->base );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
new file mode 100644
index 0000000000..00d7197b13
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -0,0 +1,233 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+
+struct pt_post_vs {
+ struct draw_context *draw;
+
+ boolean (*run)( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+};
+
+
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
+
+
+
+static INLINE unsigned
+compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0x0;
+ unsigned i;
+
+#if 0
+ debug_printf("compute clipmask %f %f %f %f\n",
+ clip[0], clip[1], clip[2], clip[3]);
+ assert(clip[3] != 0.0);
+#endif
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= (1<<0);
+ if ( clip[0] + clip[3] < 0) mask |= (1<<1);
+ if (-clip[1] + clip[3] < 0) mask |= (1<<2);
+ if ( clip[1] + clip[3] < 0) mask |= (1<<3);
+ if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
+ if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+/* The normal case - cliptest, rhw divide, viewport transform.
+ *
+ * Also handle identity viewport here at the expense of a few wasted
+ * instructions
+ */
+static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ const unsigned pos = pvs->draw->vs.position_output;
+ unsigned clipped = 0;
+ unsigned j;
+
+ if (0) debug_printf("%s\n");
+
+ for (j = 0; j < count; j++) {
+ float *position = out->data[pos];
+
+ out->clip[0] = position[0];
+ out->clip[1] = position[1];
+ out->clip[2] = position[2];
+ out->clip[3] = position[3];
+
+ out->vertex_id = 0xffff;
+ out->clipmask = compute_clipmask_gl(out->clip,
+ pvs->draw->plane,
+ pvs->draw->nr_planes);
+ clipped += out->clipmask;
+
+ if (out->clipmask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / position[3];
+
+ /* Viewport mapping */
+ position[0] = position[0] * w * scale[0] + trans[0];
+ position[1] = position[1] * w * scale[1] + trans[1];
+ position[2] = position[2] * w * scale[2] + trans[2];
+ position[3] = w;
+#if 0
+ debug_printf("post viewport: %f %f %f %f\n",
+ position[0],
+ position[1],
+ position[2],
+ position[3]);
+#endif
+ }
+
+ out = (struct vertex_header *)( (char *)out + stride );
+ }
+
+ return clipped != 0;
+}
+
+
+
+/* If bypass_clipping is set, skip cliptest and rhw divide.
+ */
+static boolean post_vs_viewport( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ const unsigned pos = pvs->draw->vs.position_output;
+ unsigned j;
+
+ if (0) debug_printf("%s\n", __FUNCTION__);
+ for (j = 0; j < count; j++) {
+ float *position = out->data[pos];
+
+ /* Viewport mapping only, no cliptest/rhw divide
+ */
+ position[0] = position[0] * scale[0] + trans[0];
+ position[1] = position[1] * scale[1] + trans[1];
+ position[2] = position[2] * scale[2] + trans[2];
+
+ out = (struct vertex_header *)((char *)out + stride);
+ }
+
+ return FALSE;
+}
+
+
+/* If bypass_clipping is set and we have an identity viewport, nothing
+ * to do.
+ */
+static boolean post_vs_none( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ if (0) debug_printf("%s\n", __FUNCTION__);
+ return FALSE;
+}
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned count,
+ unsigned stride )
+{
+ return pvs->run( pvs, pipeline_verts, count, stride );
+}
+
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean bypass_viewport,
+ boolean opengl )
+{
+ if (bypass_clipping) {
+ if (bypass_viewport)
+ pvs->run = post_vs_none;
+ else
+ pvs->run = post_vs_viewport;
+ }
+ else {
+ //if (opengl)
+ pvs->run = post_vs_cliptest_viewport_gl;
+ }
+}
+
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
+{
+ struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs );
+ if (!pvs)
+ return NULL;
+
+ pvs->draw = draw;
+
+ return pvs;
+}
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs )
+{
+ FREE(pvs);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
new file mode 100644
index 0000000000..b61fa29143
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -0,0 +1,77 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ *first = 2;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ *first = 3;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ break;
+ default:
+ assert(0);
+ *first = 0;
+ *incr = 1; /* set to one so that count % incr works */
+ break;
+ }
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
new file mode 100644
index 0000000000..d0e16c9bc3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -0,0 +1,193 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define FETCH_MAX 256
+#define DRAW_MAX (FETCH_MAX+8)
+
+struct varray_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ ushort draw_elts[DRAW_MAX];
+ unsigned fetch_elts[FETCH_MAX];
+
+ unsigned driver_fetch_max;
+ unsigned fetch_max;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned input_prim;
+ unsigned output_prim;
+};
+
+
+static void varray_flush_linear(struct varray_frontend *varray,
+ unsigned start, unsigned count)
+{
+ if (count) {
+ assert(varray->middle->run_linear);
+ varray->middle->run_linear(varray->middle, start, count);
+ }
+}
+
+static void varray_line_loop_segment(struct varray_frontend *varray,
+ unsigned start,
+ unsigned segment_start,
+ unsigned segment_count,
+ boolean end )
+{
+ assert(segment_count < varray->fetch_max);
+ if (segment_count >= 1) {
+ unsigned nr = 0, i;
+
+ for (i = 0; i < segment_count; i++)
+ varray->fetch_elts[nr++] = start + segment_start + i;
+
+ if (end)
+ varray->fetch_elts[nr++] = start;
+
+ assert(nr <= FETCH_MAX);
+
+ varray->middle->run(varray->middle,
+ varray->fetch_elts,
+ nr,
+ varray->draw_elts, /* ie. linear */
+ nr);
+ }
+}
+
+
+
+static void varray_fan_segment(struct varray_frontend *varray,
+ unsigned start,
+ unsigned segment_start,
+ unsigned segment_count )
+{
+ assert(segment_count < varray->fetch_max);
+ if (segment_count >= 2) {
+ unsigned nr = 0, i;
+
+ if (segment_start != 0)
+ varray->fetch_elts[nr++] = start;
+
+ for (i = 0 ; i < segment_count; i++)
+ varray->fetch_elts[nr++] = start + segment_start + i;
+
+ assert(nr <= FETCH_MAX);
+
+ varray->middle->run(varray->middle,
+ varray->fetch_elts,
+ nr,
+ varray->draw_elts, /* ie. linear */
+ nr);
+ }
+}
+
+
+
+
+#define FUNC varray_run
+#include "draw_pt_varray_tmp_linear.h"
+
+static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
+ PIPE_PRIM_LINE_STRIP,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLE_STRIP,
+ PIPE_PRIM_TRIANGLE_FAN,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_POLYGON
+};
+
+
+
+static void varray_prepare(struct draw_pt_front_end *frontend,
+ unsigned prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+
+ varray->base.run = varray_run;
+
+ varray->input_prim = prim;
+ varray->output_prim = decompose_prim[prim];
+
+ varray->middle = middle;
+ middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max );
+
+ /* check that the max is even */
+ assert((varray->driver_fetch_max & 1) == 0);
+
+ varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
+}
+
+
+
+
+static void varray_finish(struct draw_pt_front_end *frontend)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ varray->middle->finish(varray->middle);
+ varray->middle = NULL;
+}
+
+static void varray_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
+{
+ ushort i;
+ struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
+ if (varray == NULL)
+ return NULL;
+
+ varray->base.prepare = varray_prepare;
+ varray->base.run = NULL;
+ varray->base.finish = varray_finish;
+ varray->base.destroy = varray_destroy;
+ varray->draw = draw;
+
+ for (i = 0; i < DRAW_MAX; i++) {
+ varray->draw_elts[i] = i;
+ }
+
+ return &varray->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
new file mode 100644
index 0000000000..7c722457c3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -0,0 +1,238 @@
+
+static void FUNC(struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ struct draw_context *draw = varray->draw;
+ unsigned start = (unsigned)elts;
+
+ boolean flatfirst = (draw->rasterizer->flatshade &&
+ draw->rasterizer->flatshade_first);
+ unsigned i, j;
+ ushort flags;
+ unsigned first, incr;
+
+ varray->fetch_start = start;
+
+ draw_pt_split_prim(varray->input_prim, &first, &incr);
+
+#if 0
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
+ varray->input_prim,
+ start, count);
+#endif
+
+ switch (varray->input_prim) {
+ case PIPE_PRIM_POINTS:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i < end; i++) {
+ POINT(varray, i + 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+1 < end; i += 2) {
+ LINE(varray, DRAW_PIPE_RESET_STIPPLE,
+ i + 0, i + 1);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ LINE(varray, flags, i - 1, 0);
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i += 3) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1, i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
+ }
+ }
+ else {
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i + 2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ else {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 4) {
+ QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 2) {
+ QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ varray_flush(varray);
+}
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
new file mode 100644
index 0000000000..010c7a18a7
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -0,0 +1,91 @@
+static unsigned trim( unsigned count, unsigned first, unsigned incr )
+{
+ return count - (count - first) % incr;
+}
+
+static void FUNC(struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ unsigned start = (unsigned) ((char *) elts - (char *) NULL);
+
+ unsigned j;
+ unsigned first, incr;
+
+ draw_pt_split_prim(varray->input_prim, &first, &incr);
+
+ /* Sanitize primitive length:
+ */
+ count = trim(count, first, incr);
+ if (count < first)
+ return;
+
+#if 0
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
+ varray->input_prim,
+ start, count);
+#endif
+
+ switch (varray->input_prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ for (j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
+ varray_flush_linear(varray, start + j, nr);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ /* Always have to decompose as we've stated that this will be
+ * emitted as a line-strip.
+ */
+ for (j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
+ varray_line_loop_segment(varray, start, j, nr, nr == remaining);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
+ }
+ break;
+
+
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count < varray->driver_fetch_max) {
+ varray_flush_linear(varray, start, count);
+ }
+ else {
+ for ( j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
+ varray_fan_segment(varray, start, j, nr);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
new file mode 100644
index 0000000000..1a0527be63
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -0,0 +1,516 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+
+#define CACHE_MAX 256
+#define FETCH_MAX 256
+#define DRAW_MAX (16*1024)
+
+struct vcache_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ unsigned in[CACHE_MAX];
+ ushort out[CACHE_MAX];
+
+ ushort draw_elts[DRAW_MAX];
+ unsigned fetch_elts[FETCH_MAX];
+
+ unsigned draw_count;
+ unsigned fetch_count;
+ unsigned fetch_max;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned input_prim;
+ unsigned output_prim;
+
+ unsigned middle_prim;
+ unsigned opt;
+};
+
+static INLINE void
+vcache_flush( struct vcache_frontend *vcache )
+{
+ if (vcache->middle_prim != vcache->output_prim) {
+ vcache->middle_prim = vcache->output_prim;
+ vcache->middle->prepare( vcache->middle,
+ vcache->middle_prim,
+ vcache->opt,
+ &vcache->fetch_max );
+ }
+
+ if (vcache->draw_count) {
+ vcache->middle->run( vcache->middle,
+ vcache->fetch_elts,
+ vcache->fetch_count,
+ vcache->draw_elts,
+ vcache->draw_count );
+ }
+
+ memset(vcache->in, ~0, sizeof(vcache->in));
+ vcache->fetch_count = 0;
+ vcache->draw_count = 0;
+}
+
+static INLINE void
+vcache_check_flush( struct vcache_frontend *vcache )
+{
+ if ( vcache->draw_count + 6 >= DRAW_MAX ||
+ vcache->fetch_count + 4 >= FETCH_MAX )
+ {
+ vcache_flush( vcache );
+ }
+}
+
+
+static INLINE void
+vcache_elt( struct vcache_frontend *vcache,
+ unsigned felt,
+ ushort flags )
+{
+ unsigned idx = felt % CACHE_MAX;
+
+ if (vcache->in[idx] != felt) {
+ assert(vcache->fetch_count < FETCH_MAX);
+
+ vcache->in[idx] = felt;
+ vcache->out[idx] = (ushort)vcache->fetch_count;
+ vcache->fetch_elts[vcache->fetch_count++] = felt;
+ }
+
+ vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
+}
+
+
+
+static INLINE void
+vcache_triangle( struct vcache_frontend *vcache,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
+{
+ vcache_elt(vcache, i0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, i2, 0);
+ vcache_check_flush(vcache);
+}
+
+
+static INLINE void
+vcache_triangle_flags( struct vcache_frontend *vcache,
+ ushort flags,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
+{
+ vcache_elt(vcache, i0, flags);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, i2, 0);
+ vcache_check_flush(vcache);
+}
+
+static INLINE void
+vcache_line( struct vcache_frontend *vcache,
+ unsigned i0,
+ unsigned i1 )
+{
+ vcache_elt(vcache, i0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_check_flush(vcache);
+}
+
+
+static INLINE void
+vcache_line_flags( struct vcache_frontend *vcache,
+ ushort flags,
+ unsigned i0,
+ unsigned i1 )
+{
+ vcache_elt(vcache, i0, flags);
+ vcache_elt(vcache, i1, 0);
+ vcache_check_flush(vcache);
+}
+
+
+static INLINE void
+vcache_point( struct vcache_frontend *vcache,
+ unsigned i0 )
+{
+ vcache_elt(vcache, i0, 0);
+ vcache_check_flush(vcache);
+}
+
+static INLINE void
+vcache_quad( struct vcache_frontend *vcache,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2,
+ unsigned i3 )
+{
+ vcache_triangle( vcache, i0, i1, i3 );
+ vcache_triangle( vcache, i1, i2, i3 );
+}
+
+static INLINE void
+vcache_ef_quad( struct vcache_frontend *vcache,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2,
+ unsigned i3 )
+{
+ if (vcache->draw->rasterizer->flatshade_first) {
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i0, i1, i2 );
+
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_EDGE_FLAG_2 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i0, i2, i3 );
+ }
+ else {
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2 ),
+ i0, i1, i3 );
+
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i1, i2, i3 );
+ }
+}
+
+/* At least for now, we're back to using a template include file for
+ * this. The two paths aren't too different though - it may be
+ * possible to reunify them.
+ */
+#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
+#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
+#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
+#define POINT(vc,i0) vcache_point(vc,i0)
+#define FUNC vcache_run_extras
+#include "draw_pt_vcache_tmp.h"
+
+#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
+#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
+#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
+#define POINT(vc,i0) vcache_point(vc,i0)
+#define FUNC vcache_run
+#include "draw_pt_vcache_tmp.h"
+
+static INLINE void
+rebase_uint_elts( const unsigned *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+static INLINE void
+rebase_ushort_elts( const ushort *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+static INLINE void
+rebase_ubyte_elts( const ubyte *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+
+
+static INLINE void
+translate_uint_elts( const unsigned *src,
+ unsigned count,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i]);
+}
+
+static INLINE void
+translate_ushort_elts( const ushort *src,
+ unsigned count,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i]);
+}
+
+static INLINE void
+translate_ubyte_elts( const ubyte *src,
+ unsigned count,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i]);
+}
+
+
+
+
+#if 0
+static INLINE enum pipe_format
+format_from_get_elt( pt_elt_func get_elt )
+{
+ switch (draw->pt.user.eltSize) {
+ case 1: return PIPE_FORMAT_R8_UNORM;
+ case 2: return PIPE_FORMAT_R16_UNORM;
+ case 4: return PIPE_FORMAT_R32_UNORM;
+ default: return PIPE_FORMAT_NONE;
+ }
+}
+#endif
+
+static INLINE void
+vcache_check_run( struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned draw_count )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+ struct draw_context *draw = vcache->draw;
+ unsigned min_index = draw->pt.user.min_index;
+ unsigned max_index = draw->pt.user.max_index;
+ unsigned index_size = draw->pt.user.eltSize;
+ unsigned fetch_count = max_index + 1 - min_index;
+ const ushort *transformed_elts;
+ ushort *storage = NULL;
+ boolean ok = FALSE;
+
+
+ if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
+ vcache->fetch_max,
+ draw_count);
+
+ if (max_index == 0xffffffff ||
+ fetch_count > draw_count) {
+ if (0) debug_printf("fail\n");
+ goto fail;
+ }
+
+ if (vcache->middle_prim != vcache->input_prim) {
+ vcache->middle_prim = vcache->input_prim;
+ vcache->middle->prepare( vcache->middle,
+ vcache->middle_prim,
+ vcache->opt,
+ &vcache->fetch_max );
+ }
+
+
+ if (min_index == 0 &&
+ index_size == 2)
+ {
+ transformed_elts = (const ushort *)elts;
+ }
+ else
+ {
+ storage = MALLOC( draw_count * sizeof(ushort) );
+ if (!storage)
+ goto fail;
+
+ if (min_index == 0) {
+ switch(index_size) {
+ case 1:
+ translate_ubyte_elts( (const ubyte *)elts,
+ draw_count,
+ storage );
+ break;
+
+ case 2:
+ translate_ushort_elts( (const ushort *)elts,
+ draw_count,
+ storage );
+ break;
+
+ case 4:
+ translate_uint_elts( (const uint *)elts,
+ draw_count,
+ storage );
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+ }
+ else {
+ switch(index_size) {
+ case 1:
+ rebase_ubyte_elts( (const ubyte *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ case 2:
+ rebase_ushort_elts( (const ushort *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ case 4:
+ rebase_uint_elts( (const uint *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+ }
+ transformed_elts = storage;
+ }
+
+ if (fetch_count < UNDEFINED_VERTEX_ID)
+ ok = vcache->middle->run_linear_elts( vcache->middle,
+ min_index, /* start */
+ fetch_count,
+ transformed_elts,
+ draw_count );
+
+ FREE(storage);
+
+ if (ok)
+ return;
+
+ debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
+ fetch_count, draw_count);
+
+ fail:
+ vcache_run( frontend, get_elt, elts, draw_count );
+}
+
+
+
+
+static void
+vcache_prepare( struct draw_pt_front_end *frontend,
+ unsigned prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+
+ if (opt & PT_PIPELINE)
+ {
+ vcache->base.run = vcache_run_extras;
+ }
+ else
+ {
+ vcache->base.run = vcache_check_run;
+ }
+
+ vcache->input_prim = prim;
+ vcache->output_prim = u_reduced_prim(prim);
+
+ vcache->middle = middle;
+ vcache->opt = opt;
+
+ /* Have to run prepare here, but try and guess a good prim for
+ * doing so:
+ */
+ vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
+ middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max );
+}
+
+
+
+
+static void
+vcache_finish( struct draw_pt_front_end *frontend )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+ vcache->middle->finish( vcache->middle );
+ vcache->middle = NULL;
+}
+
+static void
+vcache_destroy( struct draw_pt_front_end *frontend )
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
+{
+ struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
+ if (vcache == NULL)
+ return NULL;
+
+ vcache->base.prepare = vcache_prepare;
+ vcache->base.run = NULL;
+ vcache->base.finish = vcache_finish;
+ vcache->base.destroy = vcache_destroy;
+ vcache->draw = draw;
+
+ memset(vcache->in, ~0, sizeof(vcache->in));
+
+ return &vcache->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
new file mode 100644
index 0000000000..62822a3d56
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -0,0 +1,214 @@
+
+
+static void FUNC( struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+ struct draw_context *draw = vcache->draw;
+
+ boolean flatfirst = (draw->rasterizer->flatshade &&
+ draw->rasterizer->flatshade_first);
+ unsigned i;
+ ushort flags;
+
+ if (0) debug_printf("%s %d\n", __FUNCTION__, count);
+
+
+ switch (vcache->input_prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ POINT( vcache,
+ get_elt(elts, i + 0) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ LINE( vcache,
+ DRAW_PIPE_RESET_STIPPLE,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( vcache,
+ flags,
+ get_elt(elts, i - 1),
+ get_elt(elts, i ));
+ }
+
+ LINE( vcache,
+ flags,
+ get_elt(elts, i - 1),
+ get_elt(elts, 0 ));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( vcache,
+ flags,
+ get_elt(elts, i - 1),
+ get_elt(elts, i ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2 ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1 + (i&1)),
+ get_elt(elts, i + 2 - (i&1)));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 0 + (i&1)),
+ get_elt(elts, i + 1 - (i&1)),
+ get_elt(elts, i + 2 ));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, 0 ));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2 ));
+ }
+ }
+ }
+ break;
+
+
+ case PIPE_PRIM_QUADS:
+ for (i = 0; i+3 < count; i += 4) {
+ if (flatfirst) {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 3) );
+ }
+ else {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 3) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 0; i+3 < count; i += 2) {
+ if (flatfirst) {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 3),
+ get_elt(elts, i + 2) );
+ }
+ else {
+ QUAD( vcache,
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 3) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ ushort edge_next, edge_finish;
+
+ if (flatfirst) {
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_middle | edge_last;
+ edge_next = edge_last;
+ edge_finish = edge_first;
+ }
+ else {
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ edge_next = edge_middle;
+ edge_finish = edge_last;
+ }
+
+ for (i = 0; i+2 < count; i++, flags = edge_next) {
+
+ if (i + 3 == count)
+ flags |= edge_finish;
+
+ if (flatfirst) {
+ TRIANGLE( vcache,
+ flags,
+ get_elt(elts, 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2) );
+ }
+ else {
+ TRIANGLE( vcache,
+ flags,
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, 0));
+ }
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ vcache_flush( vcache );
+}
+
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
new file mode 100644
index 0000000000..cccd3bf435
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -0,0 +1,129 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Vertex buffer drawing stage.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VBUF_H_
+#define DRAW_VBUF_H_
+
+
+#include "pipe/p_compiler.h"
+
+
+struct pipe_rasterizer_state;
+struct draw_context;
+struct vertex_info;
+
+
+/**
+ * Interface for hardware vertex buffer rendering.
+ */
+struct vbuf_render {
+
+ /**
+ * Driver limits. May be tuned lower to improve cache hits on
+ * index list.
+ */
+ unsigned max_indices;
+ unsigned max_vertex_buffer_bytes;
+
+ /**
+ * Query if the hardware driver needs assistance for a particular
+ * combination of rasterizer state and primitive.
+ *
+ * Currently optional.
+ */
+ boolean (*need_pipeline)(const struct vbuf_render *render,
+ const struct pipe_rasterizer_state *rasterizer,
+ unsigned int prim );
+
+
+ /**
+ * Get the hardware vertex format.
+ *
+ * XXX: have this in draw_context instead?
+ */
+ const struct vertex_info *(*get_vertex_info)( struct vbuf_render * );
+
+ /**
+ * Request a destination for vertices.
+ * Hardware renderers will use ttm memory, others will just malloc
+ * something.
+ */
+ boolean (*allocate_vertices)( struct vbuf_render *,
+ ushort vertex_size,
+ ushort nr_vertices );
+
+ void *(*map_vertices)( struct vbuf_render * );
+ void (*unmap_vertices)( struct vbuf_render *,
+ ushort min_index,
+ ushort max_index );
+
+ /**
+ * Notify the renderer of the current primitive when it changes.
+ * Must succeed for TRIANGLES, LINES and POINTS. Other prims at
+ * the discretion of the driver, for the benefit of the passthrough
+ * path.
+ */
+ boolean (*set_primitive)( struct vbuf_render *, unsigned prim );
+
+ /**
+ * DrawElements, note indices are ushort. The driver must complete
+ * this call, if necessary splitting the index list itself.
+ */
+ void (*draw)( struct vbuf_render *,
+ const ushort *indices,
+ uint nr_indices );
+
+ /* Draw Arrays path too.
+ */
+ void (*draw_arrays)( struct vbuf_render *,
+ unsigned start,
+ uint nr );
+
+ /**
+ * Called when vbuf is done with this set of vertices:
+ */
+ void (*release_vertices)( struct vbuf_render * );
+
+ void (*destroy)( struct vbuf_render * );
+};
+
+
+
+struct draw_stage *
+draw_vbuf_stage( struct draw_context *draw,
+ struct vbuf_render *render );
+
+
+#endif /*DRAW_VBUF_H_*/
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
new file mode 100644
index 0000000000..3214213e44
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -0,0 +1,129 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Functions for specifying the post-transformation vertex layout.
+ *
+ * Author:
+ * Brian Paul
+ * Keith Whitwell
+ */
+
+
+#include "draw/draw_private.h"
+#include "draw/draw_vertex.h"
+
+
+/**
+ * Compute the size of a vertex, in dwords/floats, to update the
+ * vinfo->size field.
+ */
+void
+draw_compute_vertex_size(struct vertex_info *vinfo)
+{
+ uint i;
+
+ vinfo->size = 0;
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ switch (vinfo->attrib[i].emit) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_4UB:
+ /* fall-through */
+ case EMIT_1F_PSIZE:
+ /* fall-through */
+ case EMIT_1F:
+ vinfo->size += 1;
+ break;
+ case EMIT_2F:
+ vinfo->size += 2;
+ break;
+ case EMIT_3F:
+ vinfo->size += 3;
+ break;
+ case EMIT_4F:
+ vinfo->size += 4;
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+
+void
+draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
+{
+ unsigned i, j;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ j = vinfo->attrib[i].src_index;
+ switch (vinfo->attrib[i].emit) {
+ case EMIT_OMIT:
+ debug_printf("EMIT_OMIT:");
+ break;
+ case EMIT_1F:
+ debug_printf("EMIT_1F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ debug_printf("EMIT_1F_PSIZE:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_2F:
+ debug_printf("EMIT_2F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_3F:
+ debug_printf("EMIT_3F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ data += sizeof(float);
+ break;
+ case EMIT_4F:
+ debug_printf("EMIT_4F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_4UB:
+ debug_printf("EMIT_4UB:\t");
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ break;
+ default:
+ assert(0);
+ }
+ debug_printf("\n");
+ }
+ debug_printf("\n");
+}
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
new file mode 100644
index 0000000000..554f4ac3c1
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -0,0 +1,162 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Post-transform vertex format info. The vertex_info struct is used by
+ * the draw_vbuf code to emit hardware-specific vertex layouts into hw
+ * vertex buffers.
+ *
+ * Author:
+ * Brian Paul
+ */
+
+
+#ifndef DRAW_VERTEX_H
+#define DRAW_VERTEX_H
+
+
+#include "pipe/p_state.h"
+
+
+/**
+ * Vertex attribute emit modes
+ */
+enum attrib_emit {
+ EMIT_OMIT, /**< don't emit the attribute */
+ EMIT_1F,
+ EMIT_1F_PSIZE, /**< insert constant point size */
+ EMIT_2F,
+ EMIT_3F,
+ EMIT_4F,
+ EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */
+};
+
+
+/**
+ * Attribute interpolation mode
+ */
+enum interp_mode {
+ INTERP_NONE, /**< never interpolate vertex header info */
+ INTERP_POS, /**< special case for frag position */
+ INTERP_CONSTANT,
+ INTERP_LINEAR,
+ INTERP_PERSPECTIVE
+};
+
+
+/**
+ * Information about hardware/rasterization vertex layout.
+ */
+struct vertex_info
+{
+ uint num_attribs;
+ uint hwfmt[4]; /**< hardware format info for this format */
+ uint size; /**< total vertex size in dwords */
+
+ /* Keep this small and at the end of the struct to allow quick
+ * memcmp() comparisons.
+ */
+ struct {
+ unsigned interp_mode:4; /**< INTERP_x */
+ unsigned emit:4; /**< EMIT_x */
+ unsigned src_index:8; /**< map to post-xform attribs */
+ } attrib[PIPE_MAX_SHADER_INPUTS];
+};
+
+static INLINE size_t
+draw_vinfo_size( const struct vertex_info *a )
+{
+ return offsetof(const struct vertex_info, attrib[a->num_attribs]);
+}
+
+static INLINE int
+draw_vinfo_compare( const struct vertex_info *a,
+ const struct vertex_info *b )
+{
+ size_t sizea = draw_vinfo_size( a );
+ return memcmp( a, b, sizea );
+}
+
+static INLINE void
+draw_vinfo_copy( struct vertex_info *dst,
+ const struct vertex_info *src )
+{
+ size_t size = draw_vinfo_size( src );
+ memcpy( dst, src, size );
+}
+
+
+
+/**
+ * Add another attribute to the given vertex_info object.
+ * \param src_index indicates which post-transformed vertex attrib slot
+ * corresponds to this attribute.
+ * \return slot in which the attribute was added
+ */
+static INLINE uint
+draw_emit_vertex_attr(struct vertex_info *vinfo,
+ enum attrib_emit emit,
+ enum interp_mode interp, /* only used by softpipe??? */
+ uint src_index)
+{
+ const uint n = vinfo->num_attribs;
+ assert(n < PIPE_MAX_SHADER_INPUTS);
+ vinfo->attrib[n].emit = emit;
+ vinfo->attrib[n].interp_mode = interp;
+ vinfo->attrib[n].src_index = src_index;
+ vinfo->num_attribs++;
+ return n;
+}
+
+
+extern void draw_compute_vertex_size(struct vertex_info *vinfo);
+
+void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
+ const uint8_t *data);
+
+
+static INLINE unsigned draw_translate_vinfo_format(unsigned format )
+{
+ switch (format) {
+ case EMIT_1F:
+ case EMIT_1F_PSIZE:
+ return PIPE_FORMAT_R32_FLOAT;
+ case EMIT_2F:
+ return PIPE_FORMAT_R32G32_FLOAT;
+ case EMIT_3F:
+ return PIPE_FORMAT_R32G32B32_FLOAT;
+ case EMIT_4F:
+ return PIPE_FORMAT_R32G32B32A32_FLOAT;
+ case EMIT_4UB:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+ default:
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+
+#endif /* DRAW_VERTEX_H */
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
new file mode 100644
index 0000000000..790e89ed82
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -0,0 +1,254 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+#include "tgsi/tgsi_exec.h"
+
+
+
+
+void draw_vs_set_constants( struct draw_context *draw,
+ const float (*constants)[4],
+ unsigned size )
+{
+ if (((uintptr_t)constants) & 0xf) {
+ if (size > draw->vs.const_storage_size) {
+ if (draw->vs.aligned_constant_storage)
+ align_free((void *)draw->vs.aligned_constant_storage);
+ draw->vs.aligned_constant_storage = align_malloc( size, 16 );
+ }
+ memcpy( (void*)draw->vs.aligned_constant_storage,
+ constants,
+ size );
+ constants = draw->vs.aligned_constant_storage;
+ }
+
+ draw->vs.aligned_constants = constants;
+ draw_vs_aos_machine_constants( draw->vs.aos_machine, constants );
+}
+
+
+void draw_vs_set_viewport( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport )
+{
+ draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
+}
+
+
+
+struct draw_vertex_shader *
+draw_create_vertex_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ struct draw_vertex_shader *vs;
+
+ vs = draw_create_vs_llvm( draw, shader );
+ if (!vs) {
+ vs = draw_create_vs_sse( draw, shader );
+ if (!vs) {
+ vs = draw_create_vs_ppc( draw, shader );
+ if (!vs) {
+ vs = draw_create_vs_exec( draw, shader );
+ }
+ }
+ }
+
+ if (vs)
+ {
+ uint i;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+ vs->info.output_semantic_index[i] == 0)
+ vs->position_output = i;
+ }
+ }
+
+ assert(vs);
+ return vs;
+}
+
+
+void
+draw_bind_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ if (dvs)
+ {
+ draw->vs.vertex_shader = dvs;
+ draw->vs.num_vs_outputs = dvs->info.num_outputs;
+ draw->vs.position_output = dvs->position_output;
+ dvs->prepare( dvs, draw );
+ }
+ else {
+ draw->vs.vertex_shader = NULL;
+ draw->vs.num_vs_outputs = 0;
+ }
+}
+
+
+void
+draw_delete_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs)
+{
+ unsigned i;
+
+ for (i = 0; i < dvs->nr_varients; i++)
+ dvs->varient[i]->destroy( dvs->varient[i] );
+
+ dvs->nr_varients = 0;
+
+ dvs->delete( dvs );
+}
+
+
+
+boolean
+draw_vs_init( struct draw_context *draw )
+{
+ draw->vs.machine = tgsi_exec_machine_create();
+ if (!draw->vs.machine)
+ return FALSE;
+
+ draw->vs.emit_cache = translate_cache_create();
+ if (!draw->vs.emit_cache)
+ return FALSE;
+
+ draw->vs.fetch_cache = translate_cache_create();
+ if (!draw->vs.fetch_cache)
+ return FALSE;
+
+ draw->vs.aos_machine = draw_vs_aos_machine();
+#ifdef PIPE_ARCH_X86
+ if (!draw->vs.aos_machine)
+ return FALSE;
+#endif
+
+ return TRUE;
+}
+
+void
+draw_vs_destroy( struct draw_context *draw )
+{
+ if (draw->vs.fetch_cache)
+ translate_cache_destroy(draw->vs.fetch_cache);
+
+ if (draw->vs.emit_cache)
+ translate_cache_destroy(draw->vs.emit_cache);
+
+ if (draw->vs.aos_machine)
+ draw_vs_aos_machine_destroy(draw->vs.aos_machine);
+
+ if (draw->vs.aligned_constant_storage)
+ align_free((void*)draw->vs.aligned_constant_storage);
+
+ tgsi_exec_machine_destroy(draw->vs.machine);
+}
+
+
+struct draw_vs_varient *
+draw_vs_lookup_varient( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient *varient;
+ unsigned i;
+
+ /* Lookup existing varient:
+ */
+ for (i = 0; i < vs->nr_varients; i++)
+ if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0)
+ return vs->varient[i];
+
+ /* Else have to create a new one:
+ */
+ varient = vs->create_varient( vs, key );
+ if (varient == NULL)
+ return NULL;
+
+ /* Add it to our list, could be smarter:
+ */
+ if (vs->nr_varients < Elements(vs->varient)) {
+ vs->varient[vs->nr_varients++] = varient;
+ }
+ else {
+ vs->last_varient++;
+ vs->last_varient %= Elements(vs->varient);
+ vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
+ vs->varient[vs->last_varient] = varient;
+ }
+
+ /* Done
+ */
+ return varient;
+}
+
+
+struct translate *
+draw_vs_get_fetch( struct draw_context *draw,
+ struct translate_key *key )
+{
+ if (!draw->vs.fetch ||
+ translate_key_compare(&draw->vs.fetch->key, key) != 0)
+ {
+ translate_key_sanitize(key);
+ draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key);
+ }
+
+ return draw->vs.fetch;
+}
+
+struct translate *
+draw_vs_get_emit( struct draw_context *draw,
+ struct translate_key *key )
+{
+ if (!draw->vs.emit ||
+ translate_key_compare(&draw->vs.emit->key, key) != 0)
+ {
+ translate_key_sanitize(key);
+ draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key);
+ }
+
+ return draw->vs.emit;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
new file mode 100644
index 0000000000..89ae158751
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -0,0 +1,224 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VS_H
+#define DRAW_VS_H
+
+#include "draw_context.h"
+#include "draw_private.h"
+
+
+struct draw_context;
+struct pipe_shader_state;
+
+struct draw_varient_input
+{
+ enum pipe_format format;
+ unsigned buffer;
+ unsigned offset;
+};
+
+struct draw_varient_output
+{
+ enum pipe_format format; /* output format */
+ unsigned vs_output:8; /* which vertex shader output is this? */
+ unsigned offset:24; /* offset into output vertex */
+};
+
+struct draw_varient_element {
+ struct draw_varient_input in;
+ struct draw_varient_output out;
+};
+
+struct draw_vs_varient_key {
+ unsigned output_stride;
+ unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */
+ unsigned nr_inputs:8;
+ unsigned nr_outputs:8;
+ unsigned viewport:1;
+ unsigned clip:1;
+ unsigned const_vbuffers:5;
+ struct draw_varient_element element[PIPE_MAX_ATTRIBS];
+};
+
+struct draw_vs_varient;
+
+
+struct draw_vs_varient {
+ struct draw_vs_varient_key key;
+
+ struct draw_vertex_shader *vs;
+
+ void (*set_buffer)( struct draw_vs_varient *,
+ unsigned i,
+ const void *ptr,
+ unsigned stride );
+
+ void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
+ unsigned start,
+ unsigned count,
+ void *output_buffer );
+
+ void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
+ void (*destroy)( struct draw_vs_varient * );
+};
+
+
+/**
+ * Private version of the compiled vertex_shader
+ */
+struct draw_vertex_shader {
+ struct draw_context *draw;
+
+ /* This member will disappear shortly:
+ */
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+ unsigned position_output;
+
+ /* Extracted from shader:
+ */
+ const float (*immediates)[4];
+
+ /*
+ */
+ struct draw_vs_varient *varient[16];
+ unsigned nr_varients;
+ unsigned last_varient;
+ struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
+ const struct draw_vs_varient_key *key );
+
+
+ void (*prepare)( struct draw_vertex_shader *shader,
+ struct draw_context *draw );
+
+ /* Run the shader - this interface will get cleaned up in the
+ * future:
+ */
+ void (*run_linear)( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride );
+
+
+ void (*delete)( struct draw_vertex_shader * );
+};
+
+
+struct draw_vs_varient *
+draw_vs_lookup_varient( struct draw_vertex_shader *base,
+ const struct draw_vs_varient_key *key );
+
+
+/********************************************************************************
+ * Internal functions:
+ */
+
+struct draw_vertex_shader *
+draw_create_vs_exec(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
+draw_create_vs_sse(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+
+
+struct draw_vs_varient_key;
+struct draw_vertex_shader;
+
+struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
+
+
+
+/********************************************************************************
+ * Helpers for vs implementations that don't do their own fetch/emit varients.
+ * Means these can be shared between shaders.
+ */
+struct translate;
+struct translate_key;
+
+struct translate *draw_vs_get_fetch( struct draw_context *draw,
+ struct translate_key *key );
+
+
+struct translate *draw_vs_get_emit( struct draw_context *draw,
+ struct translate_key *key );
+
+struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
+
+
+
+static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key )
+{
+ return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element);
+}
+
+static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a,
+ const struct draw_vs_varient_key *b )
+{
+ int keysize = draw_vs_varient_keysize(a);
+ return memcmp(a, b, keysize);
+}
+
+
+struct aos_machine *draw_vs_aos_machine( void );
+void draw_vs_aos_machine_destroy( struct aos_machine *machine );
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] );
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport );
+
+
+#define MAX_TGSI_VERTICES 4
+
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
new file mode 100644
index 0000000000..62e04a65f3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -0,0 +1,2274 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 6.3
+ *
+ * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Translate tgsi vertex programs to x86/x87/SSE/SSE2 machine code
+ * using the rtasm runtime assembler. Based on the old
+ * t_vb_arb_program_sse.c
+ */
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+#define DISASSEM 0
+#define FAST_MATH 1
+
+static const char *files[] =
+{
+ "NULL",
+ "CONST",
+ "IN",
+ "OUT",
+ "TEMP",
+ "SAMP",
+ "ADDR",
+ "IMM",
+ "INTERNAL",
+};
+
+static INLINE boolean eq( struct x86_reg a,
+ struct x86_reg b )
+{
+ return (a.file == b.file &&
+ a.idx == b.idx &&
+ a.mod == b.mod &&
+ a.disp == b.disp);
+}
+
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned which_reg, /* quick hack */
+ unsigned value )
+{
+ struct x86_reg reg;
+
+ if (which_reg == 0)
+ reg = cp->temp_EBP;
+ else
+ reg = cp->tmp_EAX;
+
+ if (cp->x86_reg[which_reg] != value) {
+ unsigned offset;
+
+ switch (value) {
+ case X86_IMMEDIATES:
+ assert(which_reg == 0);
+ offset = Offset(struct aos_machine, immediates);
+ break;
+ case X86_CONSTANTS:
+ assert(which_reg == 1);
+ offset = Offset(struct aos_machine, constants);
+ break;
+ case X86_BUFFERS:
+ assert(which_reg == 0);
+ offset = Offset(struct aos_machine, buffer);
+ break;
+ default:
+ assert(0);
+ offset = 0;
+ }
+
+
+ x86_mov(cp->func, reg,
+ x86_make_disp(cp->machine_EDX, offset));
+
+ cp->x86_reg[which_reg] = value;
+ }
+
+ return reg;
+}
+
+
+static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg ptr = cp->machine_EDX;
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ assert(idx < MAX_INPUTS);
+ return x86_make_disp(ptr, Offset(struct aos_machine, input[idx]));
+
+ case TGSI_FILE_OUTPUT:
+ return x86_make_disp(ptr, Offset(struct aos_machine, output[idx]));
+
+ case TGSI_FILE_TEMPORARY:
+ assert(idx < MAX_TEMPS);
+ return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
+
+ case AOS_FILE_INTERNAL:
+ assert(idx < MAX_INTERNALS);
+ return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
+
+ case TGSI_FILE_IMMEDIATE:
+ assert(idx < MAX_IMMEDIATES); /* just a sanity check */
+ return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float));
+
+ case TGSI_FILE_CONSTANT:
+ assert(idx < MAX_CONSTANTS); /* just a sanity check */
+ return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float));
+
+ default:
+ AOS_ERROR(cp, "unknown reg file");
+ return x86_make_reg(0,0);
+ }
+}
+
+
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+
+
+
+static void spill( struct aos_compilation *cp, unsigned idx )
+{
+ if (!cp->xmm[idx].dirty ||
+ (cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */
+ cp->xmm[idx].file != TGSI_FILE_OUTPUT &&
+ cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) {
+ AOS_ERROR(cp, "invalid spill");
+ return;
+ }
+ else {
+ struct x86_reg oldval = get_reg_ptr(cp,
+ cp->xmm[idx].file,
+ cp->xmm[idx].idx);
+
+ if (0) debug_printf("\nspill %s[%d]",
+ files[cp->xmm[idx].file],
+ cp->xmm[idx].idx);
+
+ assert(cp->xmm[idx].dirty);
+ sse_movaps(cp->func, oldval, x86_make_reg(file_XMM, idx));
+ cp->xmm[idx].dirty = 0;
+ }
+}
+
+
+void aos_spill_all( struct aos_compilation *cp )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+}
+
+
+static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (reg.file != file_XMM ||
+ cp->xmm[reg.idx].file != TGSI_FILE_NULL)
+ {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movaps(cp->func, tmp, reg);
+ reg = tmp;
+ }
+
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+ return reg;
+}
+
+static struct x86_reg get_xmm( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (reg.file != file_XMM)
+ {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movaps(cp->func, tmp, reg);
+ reg = tmp;
+ }
+
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+ return reg;
+}
+
+
+/* Allocate an empty xmm register, either as a temporary or later to
+ * "adopt" as a shader reg.
+ */
+struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp )
+{
+ unsigned i;
+ unsigned oldest = 0;
+ boolean found = FALSE;
+
+ for (i = 0; i < 8; i++)
+ if (cp->xmm[i].last_used != cp->insn_counter &&
+ cp->xmm[i].file == TGSI_FILE_NULL) {
+ oldest = i;
+ found = TRUE;
+ }
+
+ if (!found) {
+ for (i = 0; i < 8; i++)
+ if (cp->xmm[i].last_used < cp->xmm[oldest].last_used)
+ oldest = i;
+ }
+
+ /* Need to write out the old value?
+ */
+ if (cp->xmm[oldest].dirty)
+ spill(cp, oldest);
+
+ assert(cp->xmm[oldest].last_used != cp->insn_counter);
+
+ cp->xmm[oldest].file = TGSI_FILE_NULL;
+ cp->xmm[oldest].idx = 0;
+ cp->xmm[oldest].dirty = 0;
+ cp->xmm[oldest].last_used = cp->insn_counter;
+ return x86_make_reg(file_XMM, oldest);
+}
+
+void aos_release_xmm_reg( struct aos_compilation *cp,
+ unsigned idx )
+{
+ cp->xmm[idx].file = TGSI_FILE_NULL;
+ cp->xmm[idx].idx = 0;
+ cp->xmm[idx].dirty = 0;
+ cp->xmm[idx].last_used = 0;
+}
+
+
+static void aos_soft_release_xmm( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (reg.file == file_XMM) {
+ assert(cp->xmm[reg.idx].last_used == cp->insn_counter);
+ cp->xmm[reg.idx].last_used = cp->insn_counter - 1;
+ }
+}
+
+
+
+/* Mark an xmm reg as holding the current copy of a shader reg.
+ */
+void aos_adopt_xmm_reg( struct aos_compilation *cp,
+ struct x86_reg reg,
+ unsigned file,
+ unsigned idx,
+ unsigned dirty )
+{
+ unsigned i;
+
+ if (reg.file != file_XMM) {
+ assert(0);
+ return;
+ }
+
+
+ /* If any xmm reg thinks it holds this shader reg, break the
+ * illusion.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ /* If an xmm reg is already holding this shader reg, take into account its
+ * dirty flag...
+ */
+ dirty |= cp->xmm[i].dirty;
+ aos_release_xmm_reg(cp, i);
+ }
+ }
+
+ cp->xmm[reg.idx].file = file;
+ cp->xmm[reg.idx].idx = idx;
+ cp->xmm[reg.idx].dirty = dirty;
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+}
+
+
+/* Return a pointer to the in-memory copy of the reg, making sure it is uptodate.
+ */
+static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ unsigned i;
+
+ /* Ensure the in-memory copy of this reg is up-to-date
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx &&
+ cp->xmm[i].dirty) {
+ spill(cp, i);
+ }
+ }
+
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+/* As above, but return a pointer. Note - this pointer may alias
+ * those returned by get_arg_ptr().
+ */
+static struct x86_reg get_dst_ptr( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *dst )
+{
+ unsigned file = dst->DstRegister.File;
+ unsigned idx = dst->DstRegister.Index;
+ unsigned i;
+
+
+ /* Ensure in-memory copy of this reg is up-to-date and invalidate
+ * any xmm copies.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+
+ aos_release_xmm_reg(cp, i);
+ }
+ }
+
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+
+
+
+/* Return an XMM reg if the argument is resident, otherwise return a
+ * base+offset pointer to the saved value.
+ */
+struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ cp->xmm[i].last_used = cp->insn_counter;
+ return x86_make_reg(file_XMM, i);
+ }
+ }
+
+ /* If not found in the XMM register file, return an indirect
+ * reference to the in-memory copy:
+ */
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+
+static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg reg = get_xmm( cp,
+ aos_get_shader_reg( cp, file, idx ) );
+
+ aos_adopt_xmm_reg( cp,
+ reg,
+ file,
+ idx,
+ FALSE );
+
+ return reg;
+}
+
+
+
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+
+
+
+/* Emulate pshufd insn in regular SSE, if necessary:
+ */
+static void emit_pshufd( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ ubyte shuf )
+{
+ if (cp->have_sse2) {
+ sse2_pshufd(cp->func, dst, arg0, shuf);
+ }
+ else {
+ if (!eq(dst, arg0))
+ sse_movaps(cp->func, dst, arg0);
+
+ sse_shufps(cp->func, dst, dst, shuf);
+ }
+}
+
+/* load masks (pack into negs??)
+ * pshufd - shuffle according to writemask
+ * and - result, mask
+ * nand - dest, mask
+ * or - dest, result
+ */
+static boolean mask_write( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ unsigned mask )
+{
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ emit_pshufd(cp, tmp, imm_swz,
+ SHUF((mask & 1) ? 2 : 3,
+ (mask & 2) ? 2 : 3,
+ (mask & 4) ? 2 : 3,
+ (mask & 8) ? 2 : 3));
+
+ sse_andps(cp->func, dst, tmp);
+ sse_andnps(cp->func, tmp, result);
+ sse_orps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ return TRUE;
+}
+
+
+
+
+/* Helper for writemask:
+ */
+static boolean emit_shuf_copy2( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ struct x86_reg arg1,
+ ubyte shuf )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ emit_pshufd(cp, dst, arg1, shuf);
+ emit_pshufd(cp, tmp, arg0, shuf);
+ sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W));
+ emit_pshufd(cp, dst, dst, shuf);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ return TRUE;
+}
+
+
+
+#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
+
+
+/* Locate a source register and perform any required (simple) swizzle.
+ *
+ * Just fail on complex swizzles at this point.
+ */
+static struct x86_reg fetch_src( struct aos_compilation *cp,
+ const struct tgsi_full_src_register *src )
+{
+ struct x86_reg arg0 = aos_get_shader_reg(cp,
+ src->SrcRegister.File,
+ src->SrcRegister.Index);
+ unsigned i;
+ ubyte swz = 0;
+ unsigned negs = 0;
+ unsigned abs = 0;
+
+ for (i = 0; i < 4; i++) {
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i );
+ unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i );
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_ZERO:
+ case TGSI_EXTSWIZZLE_ONE:
+ AOS_ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2");
+ break;
+
+ default:
+ swz |= (swizzle & 0x3) << (i * 2);
+ break;
+ }
+
+ switch (neg) {
+ case TGSI_UTIL_SIGN_TOGGLE:
+ negs |= (1<<i);
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+
+ case TGSI_UTIL_SIGN_CLEAR:
+ abs |= (1<<i);
+ break;
+
+ default:
+ AOS_ERROR(cp, "unsupported sign-mode");
+ break;
+ }
+ }
+
+ if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) {
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ if (swz != SSE_SWIZZLE_NOOP)
+ emit_pshufd(cp, dst, arg0, swz);
+ else
+ sse_movaps(cp->func, dst, arg0);
+
+ if (negs && negs != 0xf) {
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ /* Load 1,-1,0,0
+ * Use neg as arg to pshufd
+ * Multiply
+ */
+ emit_pshufd(cp, tmp, imm_swz,
+ SHUF((negs & 1) ? 1 : 0,
+ (negs & 2) ? 1 : 0,
+ (negs & 4) ? 1 : 0,
+ (negs & 8) ? 1 : 0));
+ sse_mulps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ aos_soft_release_xmm(cp, imm_swz);
+ }
+ else if (negs) {
+ struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
+ sse_mulps(cp->func, dst, imm_negs);
+ aos_soft_release_xmm(cp, imm_negs);
+ }
+
+
+ if (abs && abs != 0xf) {
+ AOS_ERROR(cp, "unsupported partial abs");
+ }
+ else if (abs) {
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, dst);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ aos_soft_release_xmm(cp, neg);
+ }
+
+ aos_soft_release_xmm(cp, arg0);
+ return dst;
+ }
+
+ return arg0;
+}
+
+static void x87_fld_src( struct aos_compilation *cp,
+ const struct tgsi_full_src_register *src,
+ unsigned channel )
+{
+ struct x86_reg arg0 = aos_get_shader_reg_ptr(cp,
+ src->SrcRegister.File,
+ src->SrcRegister.Index);
+
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel );
+ unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel );
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_ZERO:
+ x87_fldz( cp->func );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ x87_fld1( cp->func );
+ break;
+
+ default:
+ x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
+ break;
+ }
+
+
+ switch (neg) {
+ case TGSI_UTIL_SIGN_TOGGLE:
+ /* Flip the sign:
+ */
+ x87_fchs( cp->func );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+
+ case TGSI_UTIL_SIGN_CLEAR:
+ x87_fabs( cp->func );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ x87_fabs( cp->func );
+ x87_fchs( cp->func );
+ break;
+
+ default:
+ AOS_ERROR(cp, "unsupported sign-mode");
+ break;
+ }
+}
+
+
+
+
+
+
+/* Used to implement write masking. This and most of the other instructions
+ * here would be easier to implement if there had been a translation
+ * to a 2 argument format (dst/arg0, arg1) at the shader level before
+ * attempting to translate to x86/sse code.
+ */
+static void store_dest( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *reg,
+ struct x86_reg result )
+{
+ struct x86_reg dst;
+
+ switch (reg->DstRegister.WriteMask) {
+ case 0:
+ return;
+
+ case TGSI_WRITEMASK_XYZW:
+ aos_adopt_xmm_reg(cp,
+ get_xmm_writable(cp, result),
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+ return;
+ default:
+ break;
+ }
+
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
+
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, get_xmm(cp, result));
+ break;
+
+ case TGSI_WRITEMASK_ZW:
+ sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_XY:
+ result = get_xmm_writable(cp, result);
+ sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W));
+ dst = result;
+ break;
+
+ case TGSI_WRITEMASK_YZW:
+ result = get_xmm_writable(cp, result);
+ sse_movss(cp->func, result, dst);
+ dst = result;
+ break;
+
+ default:
+ mask_write(cp, dst, result, reg->DstRegister.WriteMask);
+ break;
+ }
+
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+
+}
+
+static void inject_scalar( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ ubyte swizzle )
+{
+ sse_shufps(cp->func, dst, dst, swizzle);
+ sse_movss(cp->func, dst, result);
+ sse_shufps(cp->func, dst, dst, swizzle);
+}
+
+
+static void store_scalar_dest( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *reg,
+ struct x86_reg result )
+{
+ unsigned writemask = reg->DstRegister.WriteMask;
+ struct x86_reg dst;
+
+ if (writemask != TGSI_WRITEMASK_X &&
+ writemask != TGSI_WRITEMASK_Y &&
+ writemask != TGSI_WRITEMASK_Z &&
+ writemask != TGSI_WRITEMASK_W &&
+ writemask != 0)
+ {
+ result = get_xmm_writable(cp, result); /* already true, right? */
+ sse_shufps(cp->func, result, result, SHUF(X,X,X,X));
+ store_dest(cp, reg, result);
+ return;
+ }
+
+ result = get_xmm(cp, result);
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
+
+
+
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, result);
+ break;
+
+ case TGSI_WRITEMASK_Y:
+ inject_scalar(cp, dst, result, SHUF(Y, X, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_Z:
+ inject_scalar(cp, dst, result, SHUF(Z, Y, X, W));
+ break;
+
+ case TGSI_WRITEMASK_W:
+ inject_scalar(cp, dst, result, SHUF(W, Y, Z, X));
+ break;
+
+ default:
+ break;
+ }
+
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+}
+
+
+
+static void x87_fst_or_nop( struct x86_function *func,
+ unsigned writemask,
+ unsigned channel,
+ struct x86_reg ptr )
+{
+ assert(ptr.file == file_REG32);
+ if (writemask & (1<<channel))
+ x87_fst( func, x86_make_disp(ptr, channel * sizeof(float)) );
+}
+
+static void x87_fstp_or_pop( struct x86_function *func,
+ unsigned writemask,
+ unsigned channel,
+ struct x86_reg ptr )
+{
+ assert(ptr.file == file_REG32);
+ if (writemask & (1<<channel))
+ x87_fstp( func, x86_make_disp(ptr, channel * sizeof(float)) );
+ else
+ x87_fstp( func, x86_make_reg( file_x87, 0 ));
+}
+
+
+
+/*
+ */
+static void x87_fstp_dest4( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *dst )
+{
+ struct x86_reg ptr = get_dst_ptr(cp, dst);
+ unsigned writemask = dst->DstRegister.WriteMask;
+
+ x87_fst_or_nop(cp->func, writemask, 0, ptr);
+ x87_fst_or_nop(cp->func, writemask, 1, ptr);
+ x87_fst_or_nop(cp->func, writemask, 2, ptr);
+ x87_fstp_or_pop(cp->func, writemask, 3, ptr);
+}
+
+/* Save current x87 state and put it into single precision mode.
+ */
+static void save_fpu_state( struct aos_compilation *cp )
+{
+ x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_restore)));
+}
+
+static void restore_fpu_state( struct aos_compilation *cp )
+{
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_restore)));
+}
+
+static void set_fpu_round_neg_inf( struct aos_compilation *cp )
+{
+ if (cp->fpucntl != FPU_RND_NEG) {
+ cp->fpucntl = FPU_RND_NEG;
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_rnd_neg_inf)));
+ }
+}
+
+static void set_fpu_round_nearest( struct aos_compilation *cp )
+{
+ if (cp->fpucntl != FPU_RND_NEAREST) {
+ cp->fpucntl = FPU_RND_NEAREST;
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_rnd_nearest)));
+ }
+}
+
+#if 0
+static void x87_emit_ex2( struct aos_compilation *cp )
+{
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ int stack = cp->func->x87_stack;
+
+// set_fpu_round_neg_inf( cp );
+
+ x87_fld(cp->func, st0); /* a a */
+ x87_fprndint( cp->func ); /* int(a) a*/
+ x87_fsubr(cp->func, st1, st0); /* int(a) frc(a) */
+ x87_fxch(cp->func, st1); /* frc(a) int(a) */
+ x87_f2xm1(cp->func); /* (2^frc(a))-1 int(a) */
+ x87_fld1(cp->func); /* 1 (2^frc(a))-1 int(a) */
+ x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */
+ x87_fscale(cp->func); /* (2^frac(a)*2^int(int(a))) int(a) */
+ /* 2^a int(a) */
+ x87_fstp(cp->func, st1); /* 2^a */
+
+ assert( stack == cp->func->x87_stack);
+
+}
+#endif
+
+#if 0
+static void PIPE_CDECL print_reg( const char *msg,
+ const float *reg )
+{
+ debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]);
+}
+#endif
+
+#if 0
+static void emit_print( struct aos_compilation *cp,
+ const char *message, /* must point to a static string! */
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg arg = aos_get_shader_reg_ptr( cp, file, idx );
+ unsigned i;
+
+ /* There shouldn't be anything on the x87 stack. Can add this
+ * capacity later if need be.
+ */
+ assert(cp->func->x87_stack == 0);
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too. We're obviously not concerned about performance on this
+ * debug path, so here goes:
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+
+ /* Push the arguments:
+ */
+ x86_lea( cp->func, ecx, arg );
+ x86_push( cp->func, ecx );
+ x86_push_imm32( cp->func, (int)message );
+
+ /* Call the helper. Could call debug_printf directly, but
+ * print_reg is a nice place to put a breakpoint if need be.
+ */
+ x86_mov_reg_imm( cp->func, ecx, (int)print_reg );
+ x86_call( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+
+ /* Pop caller-save regs
+ */
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Done...
+ */
+}
+#endif
+
+/**
+ * The traditional instructions. All operate on internal registers
+ * and ignore write masks and swizzling issues.
+ */
+
+static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, arg0);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, arg0);
+
+ store_dest(cp, &op->FullDstRegisters[0], tmp);
+ return TRUE;
+}
+
+static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_addps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_fcos(cp->func);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+/* The dotproduct instructions don't really do that well in sse:
+ * XXX: produces wrong results -- disabled.
+ */
+static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+ /* Now the hard bit: sum the first 3 values:
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the values:
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the values (from DP3):
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+ emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W));
+ sse_addss(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+
+/* dst[0] = 1.0 * 1.0F; */
+/* dst[1] = arg0[1] * arg1[1]; */
+/* dst[2] = arg0[2] * 1.0; */
+/* dst[3] = 1.0 * arg1[3]; */
+
+ emit_shuf_copy2(cp, dst, arg0, ones, SHUF(X,W,Z,Y));
+ emit_shuf_copy2(cp, tmp, arg1, ones, SHUF(X,Z,Y,W));
+ sse_mulps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld1(cp->func); /* 1 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */
+ x87_fyl2x(cp->func); /* log2(a0) */
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+#if 0
+static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_emit_ex2(cp);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+#endif
+
+
+static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_neg_inf( cp );
+
+ /* Load all sources first to avoid aliasing
+ */
+ for (i = 3; i >= 0; i--) {
+ if (writemask & (1<<i)) {
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ x87_fprndint( cp->func );
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_nearest( cp );
+
+ /* Load all sources first to avoid aliasing
+ */
+ for (i = 3; i >= 0; i--) {
+ if (writemask & (1<<i)) {
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ x87_fprndint( cp->func );
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_neg_inf( cp );
+
+ /* suck all the source values onto the stack before writing out any
+ * dst, which may alias...
+ */
+ for (i = 3; i >= 0; i--) {
+ if (writemask & (1<<i)) {
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ x87_fld(cp->func, st0); /* a a */
+ x87_fprndint( cp->func ); /* flr(a) a */
+ x87_fsubp(cp->func, st1); /* frc(a) */
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+
+
+
+static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ unsigned lit_count = cp->lit_count++;
+ struct x86_reg result, arg0;
+ unsigned i;
+
+#if 1
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+#endif
+
+ if (writemask != TGSI_WRITEMASK_XYZW)
+ result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0]));
+ else
+ result = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+
+
+ arg0 = fetch_src( cp, &op->FullSrcRegisters[0] );
+ if (arg0.file == file_XMM) {
+ struct x86_reg tmp = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, tmp[1]));
+ sse_movaps( cp->func, tmp, arg0 );
+ arg0 = tmp;
+ }
+
+
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ /* Push the arguments:
+ */
+ x86_push_imm32( cp->func, lit_count );
+
+ x86_lea( cp->func, ecx, arg0 );
+ x86_push( cp->func, ecx );
+
+ x86_lea( cp->func, ecx, result );
+ x86_push( cp->func, ecx );
+
+ x86_push( cp->func, cp->machine_EDX );
+
+ if (lit_count < MAX_LIT_INFO) {
+ x86_mov( cp->func, ecx, x86_make_disp( cp->machine_EDX,
+ Offset(struct aos_machine, lit_info) +
+ lit_count * sizeof(struct lit_info) +
+ Offset(struct lit_info, func)));
+ }
+ else {
+ x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit );
+ }
+
+ x86_call( cp->func, ecx );
+
+ x86_pop( cp->func, ecx ); /* fixme... */
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ if (writemask != TGSI_WRITEMASK_XYZW) {
+ store_dest( cp,
+ &op->FullDstRegisters[0],
+ get_xmm_writable( cp, result ) );
+ }
+
+ return TRUE;
+}
+
+#if 0
+static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+
+ if (writemask & TGSI_WRITEMASK_YZ) {
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ struct x86_reg st2 = x86_make_reg(file_x87, 2);
+
+ /* a1' = a1 <= 0 ? 1 : a1;
+ */
+ x87_fldz(cp->func); /* 1 0 */
+#if 1
+ x87_fld1(cp->func); /* 1 0 */
+#else
+ /* Correct but slow due to fp exceptions generated in fyl2x - fix me.
+ */
+ x87_fldz(cp->func); /* 1 0 */
+#endif
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */
+ x87_fcomi(cp->func, st2); /* a1 1 0 */
+ x87_fcmovb(cp->func, st1); /* a1' 1 0 */
+ x87_fstp(cp->func, st1); /* a1' 0 */
+ x87_fstp(cp->func, st1); /* a1' */
+
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */
+ x87_fxch(cp->func, st1); /* a1' a3 */
+
+
+ /* Compute pow(a1, a3)
+ */
+ x87_fyl2x(cp->func); /* a3*log2(a1) */
+ x87_emit_ex2( cp ); /* 2^(a3*log2(a1)) */
+
+
+ /* a0' = max2(a0, 0):
+ */
+ x87_fldz(cp->func); /* 0 r2 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */
+ x87_fcomi(cp->func, st1);
+ x87_fcmovb(cp->func, st1); /* a0' 0 r2 */
+
+ x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */
+
+ x87_fcomi(cp->func, st1); /* a0' 0 r2 */
+ x87_fcmovnbe(cp->func, st2); /* r2' 0' r2 */
+
+ x87_fstp_or_pop(cp->func, writemask, 2, dst); /* 0 r2 */
+ x87_fpop(cp->func); /* r2 */
+ x87_fpop(cp->func);
+ }
+
+ if (writemask & TGSI_WRITEMASK_XW) {
+ x87_fld1(cp->func);
+ x87_fst_or_nop(cp->func, writemask, 0, dst);
+ x87_fstp_or_pop(cp->func, writemask, 3, dst);
+ }
+
+ return TRUE;
+}
+#endif
+
+
+
+static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_maxps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_minps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ /* potentially nothing to do */
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]);
+
+ /* If we can't clobber old contents of arg0, get a temporary & copy
+ * it there, then clobber it...
+ */
+ arg0 = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, arg0, arg1);
+ sse_addps(cp->func, arg0, arg2);
+ store_dest(cp, &op->FullDstRegisters[0], arg0);
+ return TRUE;
+}
+
+
+
+/* A wrapper for powf().
+ * Makes sure it is cdecl and operates on floats.
+ */
+static float PIPE_CDECL _powerf( float x, float y )
+{
+#if FAST_MATH
+ return util_fast_pow(x, y);
+#else
+ return powf( x, y );
+#endif
+}
+
+#if FAST_MATH
+static float PIPE_CDECL _exp2(float x)
+{
+ return util_fast_exp2(x);
+}
+#endif
+
+
+/* Really not sufficient -- need to check for conditions that could
+ * generate inf/nan values, which will slow things down hugely.
+ */
+static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+#if 0
+ x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */
+ x87_fyl2x(cp->func); /* a1*log2(a0) */
+
+ x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */
+
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+#else
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) );
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ /* tmp_EAX has been pushed & will be restored below */
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+#endif
+ return TRUE;
+}
+
+
+#if FAST_MATH
+static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ /* tmp_EAX has been pushed & will be restored below */
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+
+ return TRUE;
+}
+#endif
+
+
+static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ if (cp->have_sse2) {
+ sse2_rcpss(cp->func, dst, arg0);
+ /* extend precision here...
+ */
+ }
+ else {
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ sse_movss(cp->func, dst, ones);
+ sse_divss(cp->func, dst, arg0);
+ }
+
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+/* Although rsqrtps() and rcpps() are low precision on some/all SSE
+ * implementations, it is possible to improve its precision at
+ * fairly low cost, using a newton/raphson step, as below:
+ *
+ * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+ * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+ * or:
+ * x1 = rsqrtps(a) * [1.5 - .5 * a * rsqrtps(a) * rsqrtps(a)]
+ *
+ *
+ * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ if (0) {
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
+ sse_rsqrtss(cp->func, r, arg0);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+ return TRUE;
+ }
+ else {
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
+
+ struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
+ struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
+ struct x86_reg src = get_xmm_writable( cp, arg0 );
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, src);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, src);
+
+ sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */
+ sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */
+
+ store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+
+ return TRUE;
+ }
+}
+
+
+static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
+ sse_andps(cp->func, dst, ones);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_fsin(cp->func);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+
+static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_cmpps(cp->func, dst, arg1, cc_LessThan);
+ sse_andps(cp->func, dst, ones);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_subps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg tmp0 = aos_get_xmm_reg(cp);
+
+ sse2_cvttps2dq(cp->func, tmp0, arg0);
+ sse2_cvtdq2ps(cp->func, tmp0, tmp0);
+
+ store_dest(cp, &op->FullDstRegisters[0], tmp0);
+ return TRUE;
+}
+
+static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp0 = aos_get_xmm_reg(cp);
+ struct x86_reg tmp1 = aos_get_xmm_reg(cp);
+
+ emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W));
+ sse_mulps(cp->func, tmp1, arg0);
+ emit_pshufd(cp, tmp0, arg0, SHUF(Y, Z, X, W));
+ sse_mulps(cp->func, tmp0, arg1);
+ sse_subps(cp->func, tmp1, tmp0);
+ sse_shufps(cp->func, tmp1, tmp1, SHUF(Y, Z, X, W));
+
+/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */
+/* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */
+/* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */
+/* dst[3] is undef */
+
+
+ aos_release_xmm_reg(cp, tmp0.idx);
+ store_dest(cp, &op->FullDstRegisters[0], tmp1);
+ return TRUE;
+}
+
+
+
+static boolean
+emit_instruction( struct aos_compilation *cp,
+ struct tgsi_full_instruction *inst )
+{
+ x87_assert_stack_empty(cp->func);
+
+ switch( inst->Instruction.Opcode ) {
+ case TGSI_OPCODE_MOV:
+ return emit_MOV( cp, inst );
+
+ case TGSI_OPCODE_LIT:
+ return emit_LIT(cp, inst);
+
+ case TGSI_OPCODE_RCP:
+ return emit_RCP(cp, inst);
+
+ case TGSI_OPCODE_RSQ:
+ return emit_RSQ(cp, inst);
+
+ case TGSI_OPCODE_EXP:
+ /*return emit_EXP(cp, inst);*/
+ return FALSE;
+
+ case TGSI_OPCODE_LOG:
+ /*return emit_LOG(cp, inst);*/
+ return FALSE;
+
+ case TGSI_OPCODE_MUL:
+ return emit_MUL(cp, inst);
+
+ case TGSI_OPCODE_ADD:
+ return emit_ADD(cp, inst);
+
+ case TGSI_OPCODE_DP3:
+ return emit_DP3(cp, inst);
+
+ case TGSI_OPCODE_DP4:
+ return emit_DP4(cp, inst);
+
+ case TGSI_OPCODE_DST:
+ return emit_DST(cp, inst);
+
+ case TGSI_OPCODE_MIN:
+ return emit_MIN(cp, inst);
+
+ case TGSI_OPCODE_MAX:
+ return emit_MAX(cp, inst);
+
+ case TGSI_OPCODE_SLT:
+ return emit_SLT(cp, inst);
+
+ case TGSI_OPCODE_SGE:
+ return emit_SGE(cp, inst);
+
+ case TGSI_OPCODE_MAD:
+ return emit_MAD(cp, inst);
+
+ case TGSI_OPCODE_SUB:
+ return emit_SUB(cp, inst);
+
+ case TGSI_OPCODE_LRP:
+// return emit_LERP(cp, inst);
+ return FALSE;
+
+ case TGSI_OPCODE_FRC:
+ return emit_FRC(cp, inst);
+
+ case TGSI_OPCODE_CLAMP:
+// return emit_CLAMP(cp, inst);
+ return FALSE;
+
+ case TGSI_OPCODE_FLR:
+ return emit_FLR(cp, inst);
+
+ case TGSI_OPCODE_ROUND:
+ return emit_RND(cp, inst);
+
+ case TGSI_OPCODE_EX2:
+#if FAST_MATH
+ return emit_EXPBASE2(cp, inst);
+#elif 0
+ /* this seems to fail for "larger" exponents.
+ * See glean tvertProg1's EX2 test.
+ */
+ return emit_EX2(cp, inst);
+#else
+ return FALSE;
+#endif
+
+ case TGSI_OPCODE_LG2:
+ return emit_LG2(cp, inst);
+
+ case TGSI_OPCODE_POW:
+ return emit_POW(cp, inst);
+
+ case TGSI_OPCODE_XPD:
+ return emit_XPD(cp, inst);
+
+ case TGSI_OPCODE_ABS:
+ return emit_ABS(cp, inst);
+
+ case TGSI_OPCODE_DPH:
+ return emit_DPH(cp, inst);
+
+ case TGSI_OPCODE_COS:
+ return emit_COS(cp, inst);
+
+ case TGSI_OPCODE_SIN:
+ return emit_SIN(cp, inst);
+
+ case TGSI_OPCODE_TRUNC:
+ return emit_TRUNC(cp, inst);
+
+ case TGSI_OPCODE_END:
+ return TRUE;
+
+ default:
+ return FALSE;
+ }
+}
+
+
+static boolean emit_viewport( struct aos_compilation *cp )
+{
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output );
+
+ struct x86_reg scale = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, scale));
+
+ struct x86_reg translate = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, translate));
+
+ sse_mulps(cp->func, pos, scale);
+ sse_addps(cp->func, pos, translate);
+
+ aos_adopt_xmm_reg( cp,
+ pos,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output,
+ TRUE );
+ return TRUE;
+}
+
+
+/* This is useful to be able to see the results on softpipe. Doesn't
+ * do proper clipping, just assumes the backend can do it during
+ * rasterization -- for debug only...
+ */
+static boolean emit_rhw_viewport( struct aos_compilation *cp )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output);
+
+ struct x86_reg scale = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, scale));
+
+ struct x86_reg translate = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, translate));
+
+
+
+ emit_pshufd(cp, tmp, pos, SHUF(W, W, W, W));
+ sse2_rcpss(cp->func, tmp, tmp);
+ sse_shufps(cp->func, tmp, tmp, SHUF(X, X, X, X));
+
+ sse_mulps(cp->func, pos, scale);
+ sse_mulps(cp->func, pos, tmp);
+ sse_addps(cp->func, pos, translate);
+
+ /* Set pos[3] = w
+ */
+ mask_write(cp, pos, tmp, TGSI_WRITEMASK_W);
+
+ aos_adopt_xmm_reg( cp,
+ pos,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output,
+ TRUE );
+ return TRUE;
+}
+
+
+#if 0
+static boolean note_immediate( struct aos_compilation *cp,
+ struct tgsi_full_immediate *imm )
+{
+ unsigned pos = cp->num_immediates++;
+ unsigned j;
+
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
+ for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
+ cp->vaos->machine->immediate[pos][j] = imm->u[j].Float;
+ }
+
+ return TRUE;
+}
+#endif
+
+
+
+
+static void find_last_write_outputs( struct aos_compilation *cp )
+{
+ struct tgsi_parse_context parse;
+ unsigned this_instruction = 0;
+ unsigned i;
+
+ tgsi_parse_init( &parse, cp->vaos->base.vs->state.tokens );
+
+ while (!tgsi_parse_end_of_tokens( &parse )) {
+
+ tgsi_parse_token( &parse );
+
+ if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) {
+ if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File ==
+ TGSI_FILE_OUTPUT)
+ {
+ unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index;
+ cp->output_last_write[idx] = this_instruction;
+ }
+ }
+
+ this_instruction++;
+ }
+
+ tgsi_parse_free( &parse );
+}
+
+
+#define ARG_MACHINE 1
+#define ARG_START_ELTS 2
+#define ARG_COUNT 3
+#define ARG_OUTBUF 4
+
+
+static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
+ boolean linear )
+{
+ struct tgsi_parse_context parse;
+ struct aos_compilation cp;
+ unsigned fixup, label;
+
+ util_init_math();
+
+ tgsi_parse_init( &parse, varient->base.vs->state.tokens );
+
+ memset(&cp, 0, sizeof(cp));
+
+ cp.insn_counter = 1;
+ cp.vaos = varient;
+ cp.have_sse2 = 1;
+ cp.func = &varient->func[ linear ? 0 : 1 ];
+
+ cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX);
+ cp.idx_EBX = x86_make_reg(file_REG32, reg_BX);
+ cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
+ cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
+ cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
+ cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);
+ cp.stack_ESP = x86_make_reg( file_REG32, reg_SP );
+
+ x86_init_func(cp.func);
+
+ find_last_write_outputs(&cp);
+
+ x86_push(cp.func, cp.idx_EBX);
+ x86_push(cp.func, cp.count_ESI);
+ x86_push(cp.func, cp.temp_EBP);
+
+
+ /* Load arguments into regs:
+ */
+ x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE));
+ x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
+ x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
+ x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
+
+
+ /* Compare count to zero and possibly bail.
+ */
+ x86_xor(cp.func, cp.tmp_EAX, cp.tmp_EAX);
+ x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
+ fixup = x86_jcc_forward(cp.func, cc_E);
+
+
+ save_fpu_state( &cp );
+ set_fpu_round_nearest( &cp );
+
+ aos_init_inputs( &cp, linear );
+
+ cp.x86_reg[0] = 0;
+ cp.x86_reg[1] = 0;
+
+ /* Note address for loop jump
+ */
+ label = x86_get_label(cp.func);
+ {
+ /* Fetch inputs... TODO: fetch lazily...
+ */
+ if (!aos_fetch_inputs( &cp, linear ))
+ goto fail;
+
+ /* Emit the shader:
+ */
+ while( !tgsi_parse_end_of_tokens( &parse ) && !cp.error )
+ {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+#if 0
+ if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
+ goto fail;
+#endif
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (DISASSEM)
+ tgsi_dump_instruction( &parse.FullToken.FullInstruction, cp.insn_counter );
+
+ if (!emit_instruction( &cp, &parse.FullToken.FullInstruction ))
+ goto fail;
+ break;
+ }
+
+ x87_assert_stack_empty(cp.func);
+ cp.insn_counter++;
+
+ if (DISASSEM)
+ debug_printf("\n");
+ }
+
+
+ {
+ unsigned i;
+ for (i = 0; i < 8; i++) {
+ if (cp.xmm[i].file != TGSI_FILE_OUTPUT) {
+ cp.xmm[i].file = TGSI_FILE_NULL;
+ cp.xmm[i].dirty = 0;
+ }
+ }
+ }
+
+ if (cp.error)
+ goto fail;
+
+ if (cp.vaos->base.key.clip) {
+ /* not really handling clipping, just do the rhw so we can
+ * see the results...
+ */
+ emit_rhw_viewport(&cp);
+ }
+ else if (cp.vaos->base.key.viewport) {
+ emit_viewport(&cp);
+ }
+
+ /* Emit output... TODO: do this eagerly after the last write to a
+ * given output.
+ */
+ if (!aos_emit_outputs( &cp ))
+ goto fail;
+
+
+ /* Next vertex:
+ */
+ x86_lea(cp.func,
+ cp.outbuf_ECX,
+ x86_make_disp(cp.outbuf_ECX,
+ cp.vaos->base.key.output_stride));
+
+ /* Incr index
+ */
+ aos_incr_inputs( &cp, linear );
+ }
+ /* decr count, loop if not zero
+ */
+ x86_dec(cp.func, cp.count_ESI);
+ x86_jcc(cp.func, cc_NZ, label);
+
+ restore_fpu_state(&cp);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(cp.func, fixup);
+
+ /* Exit mmx state?
+ */
+ if (cp.func->need_emms)
+ mmx_emms(cp.func);
+
+ x86_pop(cp.func, cp.temp_EBP);
+ x86_pop(cp.func, cp.count_ESI);
+ x86_pop(cp.func, cp.idx_EBX);
+
+ x87_assert_stack_empty(cp.func);
+ x86_ret(cp.func);
+
+ tgsi_parse_free( &parse );
+ return !cp.error;
+
+ fail:
+ tgsi_parse_free( &parse );
+ return FALSE;
+}
+
+
+
+static void vaos_set_buffer( struct draw_vs_varient *varient,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ if (buf < vaos->nr_vb) {
+ vaos->buffer[buf].base_ptr = (char *)ptr;
+ vaos->buffer[buf].stride = stride;
+ }
+
+ if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride);
+}
+
+
+
+static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
+
+ if (0) debug_printf("%s %d\n", __FUNCTION__, count);
+
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = vaos->draw->vs.aligned_constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->buffer = vaos->buffer;
+
+ vaos->gen_run_elts( machine,
+ elts,
+ count,
+ output_buffer );
+}
+
+static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
+
+ if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count,
+ vaos->base.key.const_vbuffers);
+
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = vaos->draw->vs.aligned_constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->buffer = vaos->buffer;
+
+ vaos->gen_run_linear( machine,
+ start,
+ count,
+ output_buffer );
+
+ /* Sanity spot checks to make sure we didn't trash our constants */
+ assert(machine->internal[IMM_ONES][0] == 1.0f);
+ assert(machine->internal[IMM_IDENTITY][0] == 0.0f);
+ assert(machine->internal[IMM_NEGS][0] == -1.0f);
+}
+
+
+
+static void vaos_destroy( struct draw_vs_varient *varient )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ FREE( vaos->buffer );
+
+ x86_release_func( &vaos->func[0] );
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+}
+
+
+
+static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ unsigned i;
+ struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);
+
+ if (!vaos)
+ goto fail;
+
+ vaos->base.key = *key;
+ vaos->base.vs = vs;
+ vaos->base.set_buffer = vaos_set_buffer;
+ vaos->base.destroy = vaos_destroy;
+ vaos->base.run_linear = vaos_run_linear;
+ vaos->base.run_elts = vaos_run_elts;
+
+ vaos->draw = vs->draw;
+
+ for (i = 0; i < key->nr_inputs; i++)
+ vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 );
+
+ vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) );
+ if (!vaos->buffer)
+ goto fail;
+
+ if (0)
+ debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
+
+#if 0
+ tgsi_dump(vs->state.tokens, 0);
+#endif
+
+ if (!build_vertex_program( vaos, TRUE ))
+ goto fail;
+
+ if (!build_vertex_program( vaos, FALSE ))
+ goto fail;
+
+ vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]);
+ if (!vaos->gen_run_linear)
+ goto fail;
+
+ vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]);
+ if (!vaos->gen_run_elts)
+ goto fail;
+
+ return &vaos->base;
+
+ fail:
+ if (vaos && vaos->buffer)
+ FREE(vaos->buffer);
+
+ if (vaos)
+ x86_release_func( &vaos->func[0] );
+
+ if (vaos)
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+
+ return NULL;
+}
+
+
+struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient *varient = varient_aos_sse( vs, key );
+
+ if (varient == NULL) {
+ varient = draw_vs_varient_generic( vs, key );
+ }
+
+ return varient;
+}
+
+
+
+#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
new file mode 100644
index 0000000000..2cf72ddf7b
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -0,0 +1,253 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VS_AOS_H
+#define DRAW_VS_AOS_H
+
+#include "pipe/p_config.h"
+
+#ifdef PIPE_ARCH_X86
+
+struct tgsi_token;
+struct x86_function;
+
+#include "pipe/p_state.h"
+#include "rtasm/rtasm_x86sse.h"
+
+
+
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+#define MAX_INPUTS PIPE_MAX_ATTRIBS
+#define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS
+#define MAX_TEMPS TGSI_EXEC_NUM_TEMPS
+#define MAX_CONSTANTS 1024 /** only used for sanity checking */
+#define MAX_IMMEDIATES 1024 /** only used for sanity checking */
+#define MAX_INTERNALS 8 /** see IMM_x values below */
+
+#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
+
+#define FPU_RND_NEG 1
+#define FPU_RND_NEAREST 2
+
+struct aos_machine;
+typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
+ float *result,
+ const float *in,
+ unsigned count );
+
+void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count );
+
+struct shine_tab {
+ float exponent;
+ float values[258];
+ unsigned last_used;
+};
+
+struct lit_info {
+ lit_func func;
+ struct shine_tab *shine_tab;
+};
+
+#define MAX_SHINE_TAB 4
+#define MAX_LIT_INFO 16
+
+struct aos_buffer {
+ const void *base_ptr;
+ unsigned stride;
+ void *ptr; /* updated per vertex */
+};
+
+
+
+
+/* This is the temporary storage used by all the aos_sse vs varients.
+ * Create one per context and reuse by passing a pointer in at
+ * vs_varient creation??
+ */
+struct aos_machine {
+ float input [MAX_INPUTS ][4];
+ float output [MAX_OUTPUTS ][4];
+ float temp [MAX_TEMPS ][4];
+ float internal [MAX_INTERNALS ][4];
+
+ float scale[4]; /* viewport */
+ float translate[4]; /* viewport */
+
+ float tmp[2][4]; /* scratch space for LIT */
+
+ struct shine_tab shine_tab[MAX_SHINE_TAB];
+ struct lit_info lit_info[MAX_LIT_INFO];
+ unsigned now;
+
+
+ ushort fpu_rnd_nearest;
+ ushort fpu_rnd_neg_inf;
+ ushort fpu_restore;
+ ushort fpucntl; /* one of FPU_* above */
+
+ const float (*immediates)[4]; /* points to shader data */
+ const float (*constants)[4]; /* points to draw data */
+
+ const struct aos_buffer *buffer; /* points to ? */
+};
+
+
+
+
+struct aos_compilation {
+ struct x86_function *func;
+ struct draw_vs_varient_aos_sse *vaos;
+
+ unsigned insn_counter;
+ unsigned num_immediates;
+ unsigned count;
+ unsigned lit_count;
+
+ struct {
+ unsigned idx:16;
+ unsigned file:8;
+ unsigned dirty:8;
+ unsigned last_used;
+ } xmm[8];
+
+ unsigned x86_reg[2]; /* one of X86_* */
+
+ boolean input_fetched[PIPE_MAX_ATTRIBS];
+ unsigned output_last_write[PIPE_MAX_ATTRIBS];
+
+ boolean have_sse2;
+ boolean error;
+ short fpucntl;
+
+ /* these are actually known values, but putting them in a struct
+ * like this is helpful to keep them in sync across the file.
+ */
+ struct x86_reg tmp_EAX;
+ struct x86_reg idx_EBX; /* either start+i or &elt[i] */
+ struct x86_reg outbuf_ECX;
+ struct x86_reg machine_EDX;
+ struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg temp_EBP;
+ struct x86_reg stack_ESP;
+};
+
+struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
+void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
+
+void aos_adopt_xmm_reg( struct aos_compilation *cp,
+ struct x86_reg reg,
+ unsigned file,
+ unsigned idx,
+ unsigned dirty );
+
+void aos_spill_all( struct aos_compilation *cp );
+
+struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx );
+
+boolean aos_init_inputs( struct aos_compilation *cp, boolean linear );
+boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear );
+boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear );
+
+boolean aos_emit_outputs( struct aos_compilation *cp );
+
+
+#define IMM_ONES 0 /* 1, 1,1,1 */
+#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
+#define IMM_IDENTITY 2 /* 0, 0,0,1 */
+#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
+#define IMM_255 4 /* 255, 255, 255, 255 */
+#define IMM_NEGS 5 /* -1,-1,-1,-1 */
+#define IMM_RSQ 6 /* -.5,1.5,_,_ */
+#define IMM_PSIZE 7 /* not really an immediate - updated each run */
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm );
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm );
+
+
+#define AOS_ERROR(cp, msg) \
+do { \
+ if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
+ cp->error = 1; \
+} while (0)
+
+
+#define X86_NULL 0
+#define X86_IMMEDIATES 1
+#define X86_CONSTANTS 2
+#define X86_BUFFERS 3
+
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned which_reg,
+ unsigned value );
+
+
+typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
+
+
+struct draw_vs_varient_aos_sse {
+ struct draw_vs_varient base;
+ struct draw_context *draw;
+
+ struct aos_buffer *buffer;
+ unsigned nr_vb;
+
+ vaos_run_linear_func gen_run_linear;
+ vaos_run_elts_func gen_run_elts;
+
+
+ struct x86_function func[2];
+};
+
+
+#endif
+
+#endif
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
new file mode 100644
index 0000000000..a6eb37d128
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -0,0 +1,462 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+
+/* Note - don't yet have to worry about interacting with the code in
+ * draw_vs_aos.c as there is no intermingling of generated code...
+ * That may have to change, we'll see.
+ */
+static void emit_load_R32G32B32A32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movups(cp->func, data, src_ptr);
+}
+
+static void emit_load_R32G32B32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+#if 1
+ sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
+ /* data = z ? ? ? */
+ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
+ /* data = z ? 0 1 */
+ sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
+ /* data = ? 0 z 1 */
+ sse_movlps(cp->func, data, src_ptr);
+ /* data = x y z 1 */
+#else
+ sse_movups(cp->func, data, src_ptr);
+ /* data = x y z ? */
+ sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
+ /* data = ? x y z */
+ sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
+ /* data = 1 x y z */
+ sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
+ /* data = x y z 1 */
+#endif
+}
+
+static void emit_load_R32G32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
+ sse_movlps(cp->func, data, src_ptr);
+}
+
+
+static void emit_load_R32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, src_ptr);
+ sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
+}
+
+
+static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, src_ptr);
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
+ sse2_cvtdq2ps(cp->func, data, data);
+ sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
+}
+
+
+
+/* Extended swizzles? Maybe later.
+ */
+static void emit_swizzle( struct aos_compilation *cp,
+ struct x86_reg dest,
+ struct x86_reg src,
+ ubyte shuffle )
+{
+ sse_shufps(cp->func, dest, src, shuffle);
+}
+
+
+
+static boolean get_buffer_ptr( struct aos_compilation *cp,
+ boolean linear,
+ unsigned buf_idx,
+ struct x86_reg elt,
+ struct x86_reg ptr)
+{
+ struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ buf_idx * sizeof(struct aos_buffer));
+
+ struct x86_reg buf_stride = x86_make_disp(buf,
+ Offset(struct aos_buffer, stride));
+ if (linear) {
+ struct x86_reg buf_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_ptr);
+ x86_mov(cp->func, elt, buf_stride);
+ x86_add(cp->func, elt, ptr);
+ if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
+ x86_mov(cp->func, buf_ptr, elt);
+ }
+ else {
+ struct x86_reg buf_base_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, base_ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_stride);
+ x86_imul(cp->func, ptr, elt);
+ x86_add(cp->func, ptr, buf_base_ptr);
+ }
+
+ cp->insn_counter++;
+
+ return TRUE;
+}
+
+
+static boolean load_input( struct aos_compilation *cp,
+ unsigned idx,
+ struct x86_reg bufptr )
+{
+ unsigned format = cp->vaos->base.key.element[idx].in.format;
+ unsigned offset = cp->vaos->base.key.element[idx].in.offset;
+ struct x86_reg dataXMM = aos_get_xmm_reg(cp);
+
+ /* Figure out source pointer address:
+ */
+ struct x86_reg src = x86_make_disp(bufptr, offset);
+
+ aos_adopt_xmm_reg( cp,
+ dataXMM,
+ TGSI_FILE_INPUT,
+ idx,
+ TRUE );
+
+ switch (format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_load_R32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_load_R32G32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_load_R32G32B32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_load_R32G32B32A32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
+ break;
+ default:
+ AOS_ERROR(cp, "unhandled input format");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean load_inputs( struct aos_compilation *cp,
+ unsigned buffer,
+ struct x86_reg ptr )
+{
+ unsigned i;
+
+ for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
+ if (cp->vaos->base.key.element[i].in.buffer == buffer) {
+
+ if (!load_input( cp, i, ptr ))
+ return FALSE;
+
+ cp->insn_counter++;
+ }
+ }
+
+ return TRUE;
+}
+
+boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
+{
+ unsigned i;
+ for (i = 0; i < cp->vaos->nr_vb; i++) {
+ struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ i * sizeof(struct aos_buffer));
+
+ struct x86_reg buf_base_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, base_ptr));
+
+ if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ x86_mov(cp->func, ptr, buf_base_ptr);
+
+ /* Load all inputs for this constant vertex buffer
+ */
+ load_inputs( cp, i, x86_deref(ptr) );
+
+ /* Then just force them out to aos_machine.input[]
+ */
+ aos_spill_all( cp );
+
+ }
+ else if (linear) {
+
+ struct x86_reg elt = cp->idx_EBX;
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ struct x86_reg buf_stride = x86_make_disp(buf,
+ Offset(struct aos_buffer, stride));
+
+ struct x86_reg buf_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_stride);
+ x86_imul(cp->func, ptr, elt);
+ x86_add(cp->func, ptr, buf_base_ptr);
+
+
+ /* In the linear case, keep the buffer pointer instead of the
+ * index number.
+ */
+ if (cp->vaos->nr_vb == 1)
+ x86_mov( cp->func, elt, ptr );
+ else
+ x86_mov( cp->func, buf_ptr, ptr );
+
+ cp->insn_counter++;
+ }
+ }
+
+ return TRUE;
+}
+
+boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
+{
+ unsigned j;
+
+ for (j = 0; j < cp->vaos->nr_vb; j++) {
+ if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
+ /* just retreive pre-transformed input */
+ }
+ else if (linear && cp->vaos->nr_vb == 1) {
+ load_inputs( cp, 0, cp->idx_EBX );
+ }
+ else {
+ struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
+ return FALSE;
+
+ if (!load_inputs( cp, j, ptr ))
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
+{
+ if (linear && cp->vaos->nr_vb == 1) {
+ struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ (0 * sizeof(struct aos_buffer) +
+ Offset(struct aos_buffer, stride)));
+
+ x86_add(cp->func, cp->idx_EBX, stride);
+ sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
+ }
+ else if (linear) {
+ /* Nothing to do */
+ }
+ else {
+ x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
+ }
+
+ return TRUE;
+}
+
+
+
+
+
+
+static void emit_store_R32G32B32A32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movups(cp->func, dst_ptr, dataXMM);
+}
+
+static void emit_store_R32G32B32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(cp->func, dst_ptr, dataXMM);
+ sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
+}
+
+static void emit_store_R32G32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(cp->func, dst_ptr, dataXMM);
+}
+
+static void emit_store_R32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movss(cp->func, dst_ptr, dataXMM);
+}
+
+
+
+static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
+ sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
+ sse2_packssdw(cp->func, dataXMM, dataXMM);
+ sse2_packuswb(cp->func, dataXMM, dataXMM);
+ sse_movss(cp->func, dst_ptr, dataXMM);
+}
+
+
+
+
+
+static boolean emit_output( struct aos_compilation *cp,
+ struct x86_reg ptr,
+ struct x86_reg dataXMM,
+ unsigned format )
+{
+ switch (format) {
+ case EMIT_1F:
+ case EMIT_1F_PSIZE:
+ emit_store_R32(cp, ptr, dataXMM);
+ break;
+ case EMIT_2F:
+ emit_store_R32G32(cp, ptr, dataXMM);
+ break;
+ case EMIT_3F:
+ emit_store_R32G32B32(cp, ptr, dataXMM);
+ break;
+ case EMIT_4F:
+ emit_store_R32G32B32A32(cp, ptr, dataXMM);
+ break;
+ case EMIT_4UB:
+ if (1) {
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ }
+ else {
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ }
+ break;
+ default:
+ AOS_ERROR(cp, "unhandled output format");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
+boolean aos_emit_outputs( struct aos_compilation *cp )
+{
+ unsigned i;
+
+ for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
+ unsigned format = cp->vaos->base.key.element[i].out.format;
+ unsigned offset = cp->vaos->base.key.element[i].out.offset;
+ unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
+
+ struct x86_reg data;
+
+ if (format == EMIT_1F_PSIZE) {
+ data = aos_get_internal_xmm( cp, IMM_PSIZE );
+ }
+ else {
+ data = aos_get_shader_reg( cp,
+ TGSI_FILE_OUTPUT,
+ vs_output );
+ }
+
+ if (data.file != file_XMM) {
+ struct x86_reg tmp = aos_get_xmm_reg( cp );
+ sse_movaps(cp->func, tmp, data);
+ data = tmp;
+ }
+
+ if (!emit_output( cp,
+ x86_make_disp( cp->outbuf_ECX, offset ),
+ data,
+ format ))
+ return FALSE;
+
+ aos_release_xmm_reg( cp, data.idx );
+
+ cp->insn_counter++;
+ }
+
+ return TRUE;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
new file mode 100644
index 0000000000..3240e3745d
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
@@ -0,0 +1,324 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#ifdef PIPE_ARCH_X86
+
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+
+void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 0.0F;
+ result[3] = 1.0F;
+ }
+ else
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 0.0F;
+ result[3] = 1.0F;
+ return;
+ }
+
+ if (machine->lit_info[count].shine_tab->exponent != in[3]) {
+ machine->lit_info[count].func = aos_do_lit;
+ goto no_luck;
+ }
+
+ if (in[1] <= 1.0)
+ {
+ const float *tab = machine->lit_info[count].shine_tab->values;
+ float f = in[1] * 256;
+ int k = (int)f;
+ float frac = f - (float)k;
+
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
+ result[3] = 1.0;
+ return;
+ }
+
+ no_luck:
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void do_populate_lut( struct shine_tab *tab,
+ float unclamped_exponent )
+{
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
+ unsigned i;
+
+ tab->exponent = unclamped_exponent; /* for later comparison */
+
+ tab->values[0] = 0;
+ if (exponent == 0) {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = 1.0;
+ }
+ }
+ else {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = powf((float)i * epsilon, exponent);
+ }
+ }
+}
+
+
+
+
+static void PIPE_CDECL populate_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ unsigned i, tab;
+
+ /* Search for an existing table for this value. Note that without
+ * static analysis we don't really know if in[3] will be constant,
+ * but it usually is...
+ */
+ for (tab = 0; tab < 4; tab++) {
+ if (machine->shine_tab[tab].exponent == in[3]) {
+ goto found;
+ }
+ }
+
+ for (tab = 0, i = 1; i < 4; i++) {
+ if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
+ tab = i;
+ }
+
+ if (machine->shine_tab[tab].last_used == machine->now) {
+ /* No unused tables (this is not a ffvertex program...). Just
+ * call pow each time:
+ */
+ machine->lit_info[count].func = aos_do_lit;
+ machine->lit_info[count].func( machine, result, in, count );
+ return;
+ }
+ else {
+ do_populate_lut( &machine->shine_tab[tab], in[3] );
+ }
+
+ found:
+ machine->shine_tab[tab].last_used = machine->now;
+ machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
+ machine->lit_info[count].func = do_lit_lut;
+ machine->lit_info[count].func( machine, result, in, count );
+}
+
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] )
+{
+ machine->constants = constants;
+
+ {
+ unsigned i;
+ for (i = 0; i < MAX_LIT_INFO; i++) {
+ machine->lit_info[i].func = populate_lut;
+ machine->now++;
+ }
+ }
+}
+
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport )
+{
+ memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
+ memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
+}
+
+
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+ align_free(machine);
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+ struct aos_machine *machine;
+ unsigned i;
+ float inv = 1.0f/255.0f;
+ float f255 = 255.0f;
+
+ machine = align_malloc(sizeof(struct aos_machine), 16);
+ if (!machine)
+ return NULL;
+
+ memset(machine, 0, sizeof(*machine));
+
+ ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
+ *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
+ ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
+ ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
+ ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+ ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
+
+
+ machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_NEAREST |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ assert(machine->fpu_rnd_nearest == 0x37f);
+
+ machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_DOWN |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ for (i = 0; i < MAX_SHINE_TAB; i++)
+ do_populate_lut( &machine->shine_tab[i], 1.0f );
+
+ return machine;
+}
+
+#else
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport )
+{
+}
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] )
+{
+}
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+ return NULL;
+}
+#endif
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
new file mode 100644
index 0000000000..41cc802613
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -0,0 +1,208 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_exec.h"
+
+
+struct exec_vertex_shader {
+ struct draw_vertex_shader base;
+ struct tgsi_exec_machine *machine;
+};
+
+static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs )
+{
+ return (struct exec_vertex_shader *)vs;
+}
+
+
+/* Not required for run_linear.
+ */
+static void
+vs_exec_prepare( struct draw_vertex_shader *shader,
+ struct draw_context *draw )
+{
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+
+ /* Specify the vertex program to interpret/execute.
+ * Avoid rebinding when possible.
+ */
+ if (evs->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(evs->machine,
+ shader->state.tokens,
+ draw->vs.num_samplers,
+ draw->vs.samplers);
+ }
+}
+
+
+
+
+/* Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
+ */
+static void
+vs_exec_run_linear( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+ struct tgsi_exec_machine *machine = evs->machine;
+ unsigned int i, j;
+ unsigned slot;
+
+ machine->Consts = constants;
+
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+
+ /* Swizzle inputs.
+ */
+ for (j = 0; j < max_vertices; j++) {
+#if 0
+ debug_printf("%d) Input vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ input[slot][0],
+ input[slot][1],
+ input[slot][2],
+ input[slot][3]);
+ }
+#endif
+
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+#if 0
+ assert(!util_is_inf_or_nan(input[slot][0]));
+ assert(!util_is_inf_or_nan(input[slot][1]));
+ assert(!util_is_inf_or_nan(input[slot][2]));
+ assert(!util_is_inf_or_nan(input[slot][3]));
+#endif
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
+ }
+
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
+
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ max_vertices > 3);
+
+ /* run interpreter */
+ tgsi_exec_machine_run( machine );
+
+ /* Unswizzle all output results.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+
+ }
+
+#if 0
+ debug_printf("%d) Post xform vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ output[slot][0],
+ output[slot][1],
+ output[slot][2],
+ output[slot][3]);
+ assert(!util_is_inf_or_nan(output[slot][0]));
+ }
+#endif
+
+ output = (float (*)[4])((char *)output + output_stride);
+ }
+
+ }
+}
+
+
+
+
+static void
+vs_exec_delete( struct draw_vertex_shader *dvs )
+{
+ FREE((void*) dvs->state.tokens);
+ FREE( dvs );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_exec(struct draw_context *draw,
+ const struct pipe_shader_state *state)
+{
+ struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
+
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+ vs->base.prepare = vs_exec_prepare;
+ vs->base.run_linear = vs_exec_run_linear;
+ vs->base.delete = vs_exec_delete;
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->machine = draw->vs.machine;
+
+ return &vs->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
new file mode 100644
index 0000000000..b3535c0e48
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -0,0 +1,161 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#ifdef MESA_LLVM
+
+#include "gallivm/gallivm.h"
+
+struct draw_llvm_vertex_shader {
+ struct draw_vertex_shader base;
+ struct gallivm_prog *llvm_prog;
+ struct tgsi_exec_machine *machine;
+};
+
+
+static void
+vs_llvm_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+}
+
+
+
+
+static void
+vs_llvm_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct draw_llvm_vertex_shader *shader =
+ (struct draw_llvm_vertex_shader *)base;
+
+ gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine,
+ input, base->info.num_inputs, output, base->info.num_outputs,
+ constants, count, input_stride, output_stride);
+}
+
+
+
+static void
+vs_llvm_delete( struct draw_vertex_shader *base )
+{
+ struct draw_llvm_vertex_shader *shader =
+ (struct draw_llvm_vertex_shader *)base;
+
+ /* Do something to free compiled shader:
+ */
+
+ FREE( (void*) shader->base.state.tokens );
+ FREE( shader );
+}
+
+
+
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_llvm_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+ vs->base.prepare = vs_llvm_prepare;
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.run_linear = vs_llvm_run_linear;
+ vs->base.delete = vs_llvm_delete;
+ vs->machine = draw->vs.machine;
+
+ {
+ struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
+ gallivm_ir_set_layout(ir, GALLIVM_SOA);
+ gallivm_ir_set_components(ir, 4);
+ gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens);
+ vs->llvm_prog = gallivm_ir_compile(ir);
+ gallivm_ir_delete(ir);
+ }
+
+ draw->vs.engine = gallivm_global_cpu_engine();
+
+ /* XXX: Why are there two versions of this? Shouldn't creating the
+ * engine be a separate operation to compiling a shader?
+ */
+ if (!draw->vs.engine) {
+ draw->vs.engine = gallivm_cpu_engine_create(vs->llvm_prog);
+ }
+ else {
+ gallivm_cpu_jit_compile(draw->vs.engine, vs->llvm_prog);
+ }
+
+ return &vs->base;
+}
+
+
+
+
+
+#else
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ return NULL;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
new file mode 100644
index 0000000000..ad184bd696
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -0,0 +1,245 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_config.h"
+
+#include "draw_vs.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_ppc.h"
+#include "tgsi/tgsi_ppc.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+
+
+
+typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
+ float (*outputs)[4][4],
+ float (*temps)[4][4],
+ float (*immeds)[4],
+ float (*consts)[4],
+ const float *builtins);
+
+
+struct draw_ppc_vertex_shader {
+ struct draw_vertex_shader base;
+ struct ppc_function ppc_program;
+
+ codegen_function func;
+};
+
+
+static void
+vs_ppc_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ /* nothing */
+}
+
+
+/**
+ * Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
+ */
+static void
+vs_ppc_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+ unsigned int i;
+
+#define MAX_VERTICES 4
+
+ /* loop over verts */
+ for (i = 0; i < count; i += MAX_VERTICES) {
+ const uint max_vertices = MIN2(MAX_VERTICES, count - i);
+ float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB;
+ float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB;
+ float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB;
+ uint attr;
+
+ /* convert (up to) four input verts to SoA format */
+ for (attr = 0; attr < base->info.num_inputs; attr++) {
+ const float *vIn = (const float *) input;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+#if 0
+ if (attr==0)
+ printf("Input v%d a%d: %f %f %f %f\n",
+ vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]);
+#endif
+ inputs_soa[attr][0][vert] = vIn[attr * 4 + 0];
+ inputs_soa[attr][1][vert] = vIn[attr * 4 + 1];
+ inputs_soa[attr][2][vert] = vIn[attr * 4 + 2];
+ inputs_soa[attr][3][vert] = vIn[attr * 4 + 3];
+ vIn += input_stride / 4;
+ }
+ }
+
+ /* run compiled shader
+ */
+ shader->func(inputs_soa, outputs_soa, temps_soa,
+ (float (*)[4]) shader->base.immediates,
+ (float (*)[4]) constants,
+ ppc_builtin_constants);
+
+ /* convert (up to) four output verts from SoA back to AoS format */
+ for (attr = 0; attr < base->info.num_outputs; attr++) {
+ float *vOut = (float *) output;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+ vOut[attr * 4 + 0] = outputs_soa[attr][0][vert];
+ vOut[attr * 4 + 1] = outputs_soa[attr][1][vert];
+ vOut[attr * 4 + 2] = outputs_soa[attr][2][vert];
+ vOut[attr * 4 + 3] = outputs_soa[attr][3][vert];
+#if 0
+ if (attr==0)
+ printf("Output v%d a%d: %f %f %f %f\n",
+ vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]);
+#endif
+ vOut += output_stride / 4;
+ }
+ }
+
+ /* advance to next group of four input/output verts */
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+ }
+}
+
+
+static void
+vs_ppc_delete( struct draw_vertex_shader *base )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+
+ ppc_release_func( &shader->ppc_program );
+
+ align_free( (void *) shader->base.immediates );
+
+ FREE( (void*) shader->base.state.tokens );
+ FREE( shader );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_ppc_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_ppc_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens)
+ goto fail;
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+#if 0
+ if (1)
+ vs->base.create_varient = draw_vs_varient_aos_ppc;
+ else
+#endif
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.prepare = vs_ppc_prepare;
+ vs->base.run_linear = vs_ppc_run_linear;
+ vs->base.delete = vs_ppc_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
+ ppc_init_func( &vs->ppc_program );
+
+#if 0
+ ppc_print_code(&vs->ppc_program, TRUE);
+ ppc_indent(&vs->ppc_program, 8);
+#endif
+
+ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
+ &vs->ppc_program,
+ (float (*)[4]) vs->base.immediates,
+ TRUE ))
+ goto fail;
+
+ vs->func = (codegen_function) ppc_get_func( &vs->ppc_program );
+ if (!vs->func) {
+ goto fail;
+ }
+
+ return &vs->base;
+
+fail:
+ /*
+ debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n");
+ */
+
+ ppc_release_func( &vs->ppc_program );
+
+ FREE(vs);
+ return NULL;
+}
+
+
+
+#else /* PIPE_ARCH_PPC */
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc( struct draw_context *draw,
+ const struct pipe_shader_state *templ )
+{
+ return (void *) 0;
+}
+
+
+#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
new file mode 100644
index 0000000000..702051387a
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -0,0 +1,217 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_config.h"
+
+#include "draw_vs.h"
+
+#if defined(PIPE_ARCH_X86)
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_x86sse.h"
+#include "tgsi/tgsi_sse2.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+
+#define SSE_MAX_VERTICES 4
+
+
+struct draw_sse_vertex_shader {
+ struct draw_vertex_shader base;
+ struct x86_function sse2_program;
+
+ tgsi_sse2_vs_func func;
+
+ struct tgsi_exec_machine *machine;
+};
+
+
+static void
+vs_sse_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
+ struct tgsi_exec_machine *machine = shader->machine;
+
+ machine->Samplers = draw->vs.samplers;
+}
+
+
+
+/* Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
+ */
+static void
+vs_sse_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned int i;
+
+ /* By default, execute all channels. XXX move this inside the loop
+ * below when we support shader conditionals/loops.
+ */
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+
+ if (max_vertices < 4) {
+ /* disable the unused execution channels */
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ 0);
+ }
+
+ /* run compiled shader
+ */
+ shader->func(machine,
+ constants,
+ shader->base.immediates,
+ input,
+ base->info.num_inputs,
+ input_stride,
+ output,
+ base->info.num_outputs,
+ output_stride );
+
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+ }
+}
+
+
+
+
+static void
+vs_sse_delete( struct draw_vertex_shader *base )
+{
+ struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
+
+ x86_release_func( &shader->sse2_program );
+
+ align_free( (void *) shader->base.immediates );
+
+ FREE( (void*) shader->base.state.tokens );
+ FREE( shader );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_sse(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_sse_vertex_shader *vs;
+
+ if (!rtasm_cpu_has_sse2())
+ return NULL;
+
+ vs = CALLOC_STRUCT( draw_sse_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens)
+ goto fail;
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+ if (1)
+ vs->base.create_varient = draw_vs_varient_aos_sse;
+ else
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.prepare = vs_sse_prepare;
+ vs->base.run_linear = vs_sse_run_linear;
+ vs->base.delete = vs_sse_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
+ vs->machine = draw->vs.machine;
+
+ x86_init_func( &vs->sse2_program );
+
+ if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
+ &vs->sse2_program,
+ (float (*)[4])vs->base.immediates,
+ TRUE ))
+ goto fail;
+
+ vs->func = (tgsi_sse2_vs_func) x86_get_func( &vs->sse2_program );
+ if (!vs->func) {
+ goto fail;
+ }
+
+ return &vs->base;
+
+fail:
+ debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n");
+
+ x86_release_func( &vs->sse2_program );
+
+ FREE(vs);
+ return NULL;
+}
+
+
+
+#else
+
+struct draw_vertex_shader *
+draw_create_vs_sse( struct draw_context *draw,
+ const struct pipe_shader_state *templ )
+{
+ return (void *) 0;
+}
+
+
+#endif
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
new file mode 100644
index 0000000000..7ee567d478
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -0,0 +1,322 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_vs.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+/* A first pass at incorporating vertex fetch/emit functionality into
+ */
+struct draw_vs_varient_generic {
+ struct draw_vs_varient base;
+
+ struct draw_vertex_shader *shader;
+ struct draw_context *draw;
+
+ /* Basic plan is to run these two translate functions before/after
+ * the vertex shader's existing run_linear() routine to simulate
+ * the inclusion of this functionality into the shader...
+ *
+ * Next will look at actually including it.
+ */
+ struct translate *fetch;
+ struct translate *emit;
+
+ unsigned temp_vertex_stride;
+};
+
+
+
+
+
+static void vsvg_set_buffer( struct draw_vs_varient *varient,
+ unsigned buffer,
+ const void *ptr,
+ unsigned stride )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ vsvg->fetch->set_buffer(vsvg->fetch,
+ buffer,
+ ptr,
+ stride);
+}
+
+
+/* Mainly for debug at this stage:
+ */
+static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
+ unsigned count,
+ void *output_buffer )
+{
+ char *ptr = (char *)output_buffer;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
+ unsigned stride = vsvg->temp_vertex_stride;
+ unsigned j;
+
+ ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
+
+ for (j = 0; j < count; j++, ptr += stride) {
+ float *data = (float *)ptr;
+ float w = 1.0f / data[3];
+
+ data[0] = data[0] * w * scale[0] + trans[0];
+ data[1] = data[1] * w * scale[1] + trans[1];
+ data[2] = data[2] * w * scale[2] + trans[2];
+ data[3] = w;
+ }
+}
+
+static void do_viewport( struct draw_vs_varient_generic *vsvg,
+ unsigned count,
+ void *output_buffer )
+{
+ char *ptr = (char *)output_buffer;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
+ unsigned stride = vsvg->temp_vertex_stride;
+ unsigned j;
+
+ ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
+
+ for (j = 0; j < count; j++, ptr += stride) {
+ float *data = (float *)ptr;
+
+ data[0] = data[0] * scale[0] + trans[0];
+ data[1] = data[1] * scale[1] + trans[1];
+ data[2] = data[2] * scale[2] + trans[2];
+ }
+}
+
+
+static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer)
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
+ void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
+
+ if (0) debug_printf("%s %d \n", __FUNCTION__, count);
+
+ /* Want to do this in small batches for cache locality?
+ */
+
+ vsvg->fetch->run_elts( vsvg->fetch,
+ elts,
+ count,
+ temp_buffer );
+
+ vsvg->base.vs->run_linear( vsvg->base.vs,
+ temp_buffer,
+ temp_buffer,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+ count,
+ temp_vertex_stride,
+ temp_vertex_stride);
+
+
+ if (vsvg->base.key.clip) {
+ /* not really handling clipping, just do the rhw so we can
+ * see the results...
+ */
+ do_rhw_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+ else if (vsvg->base.key.viewport) {
+ do_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 0,
+ temp_buffer,
+ temp_vertex_stride );
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 1,
+ &vsvg->draw->rasterizer->point_size,
+ 0);
+
+ vsvg->emit->run( vsvg->emit,
+ 0, count,
+ output_buffer );
+
+ FREE(temp_buffer);
+}
+
+
+static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
+ void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
+
+ if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
+ vsvg->base.key.output_stride,
+ temp_vertex_stride);
+
+ vsvg->fetch->run( vsvg->fetch,
+ start,
+ count,
+ temp_buffer );
+
+ vsvg->base.vs->run_linear( vsvg->base.vs,
+ temp_buffer,
+ temp_buffer,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+ count,
+ temp_vertex_stride,
+ temp_vertex_stride);
+
+ if (vsvg->base.key.clip) {
+ /* not really handling clipping, just do the rhw so we can
+ * see the results...
+ */
+ do_rhw_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+ else if (vsvg->base.key.viewport) {
+ do_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 0,
+ temp_buffer,
+ temp_vertex_stride );
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 1,
+ &vsvg->draw->rasterizer->point_size,
+ 0);
+
+ vsvg->emit->run( vsvg->emit,
+ 0, count,
+ output_buffer );
+
+ FREE(temp_buffer);
+}
+
+
+
+
+
+static void vsvg_destroy( struct draw_vs_varient *varient )
+{
+ FREE(varient);
+}
+
+
+struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ unsigned i;
+ struct translate_key fetch, emit;
+
+ struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
+ if (vsvg == NULL)
+ return NULL;
+
+ vsvg->base.key = *key;
+ vsvg->base.vs = vs;
+ vsvg->base.set_buffer = vsvg_set_buffer;
+ vsvg->base.run_elts = vsvg_run_elts;
+ vsvg->base.run_linear = vsvg_run_linear;
+ vsvg->base.destroy = vsvg_destroy;
+
+ vsvg->draw = vs->draw;
+
+ vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
+ vsvg->base.vs->info.num_outputs) * 4 * sizeof(float);
+
+ /* Build free-standing fetch and emit functions:
+ */
+ fetch.nr_elements = key->nr_inputs;
+ fetch.output_stride = vsvg->temp_vertex_stride;
+ for (i = 0; i < key->nr_inputs; i++) {
+ fetch.element[i].input_format = key->element[i].in.format;
+ fetch.element[i].input_buffer = key->element[i].in.buffer;
+ fetch.element[i].input_offset = key->element[i].in.offset;
+ fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fetch.element[i].output_offset = i * 4 * sizeof(float);
+ assert(fetch.element[i].output_offset < fetch.output_stride);
+ }
+
+
+ emit.nr_elements = key->nr_outputs;
+ emit.output_stride = key->output_stride;
+ for (i = 0; i < key->nr_outputs; i++) {
+ if (key->element[i].out.format != EMIT_1F_PSIZE)
+ {
+ emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit.element[i].input_buffer = 0;
+ emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
+ emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
+ emit.element[i].output_offset = key->element[i].out.offset;
+ assert(emit.element[i].input_offset <= fetch.output_stride);
+ }
+ else {
+ emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
+ emit.element[i].input_buffer = 1;
+ emit.element[i].input_offset = 0;
+ emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
+ emit.element[i].output_offset = key->element[i].out.offset;
+ }
+ }
+
+ vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
+ vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
+
+ return &vsvg->base;
+}
+
+
+
+
+