Merge commit 'origin/gallium-0.1' into gallium-0.2

Conflicts: src/gallium/auxiliary/gallivm/instructionssoa.cpp src/gallium/auxiliary/gallivm/soabuiltins.c src/gallium/auxiliary/rtasm/rtasm_x86sse.c src/gallium/auxiliary/rtasm/rtasm_x86sse.h src/mesa/main/texenvprogram.c src/mesa/shader/arbprogparse.c src/mesa/shader/prog_statevars.c src/mesa/state_tracker/st_draw.c src/mesa/vbo/vbo_exec_draw.c
author: Keith Whitwell <keith@tungstengraphics.com> 2008-10-10 15:19:05 +0100
committer: Keith Whitwell <keith@tungstengraphics.com> 2008-10-10 15:23:36 +0100
commit: d7f1cb5b5a134b63227d5746a2dd1f05597c5c2f (patch)
tree: 7446e243da8b50f6cb323b7917bdb94fbd528368
parent: 7ac1fc77661faf0897507fef0437fe69d0ba53ac (diff)
parent: f7556fdd40ed2719beaba271eee4a7551e212ad1 (diff)
40 files changed, 769 insertions, 122 deletions
diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile
index 28c0b12fa4..a4077bd016 100644
--- a/progs/trivial/Makefile
+++ b/progs/trivial/Makefile
@@ -125,6 +125,9 @@ SOURCES = \
 	vp-clip.c \
 	vp-line-clip.c \
 	vp-tri.c \
+	vp-tri-swap.c \
+	vp-tri-imm.c \
+	vp-tri-cb.c \
 	vp-unfilled.c 
 
 PROGS = $(SOURCES:%.c=%)
diff --git a/progs/trivial/vp-tri-cb.c b/progs/trivial/vp-tri-cb.c
new file mode 100644
index 0000000000..f9d0d7f559
--- /dev/null
+++ b/progs/trivial/vp-tri-cb.c
@@ -0,0 +1,107 @@
+/* Test glGenProgramsNV(), glIsProgramNV(), glLoadProgramNV() */
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/glut.h>
+
+static void Init( void )
+{
+   GLint errno;
+   GLuint prognum;
+   
+   static const char *prog1 =
+      "!!ARBvp1.0\n"
+      "PARAM Diffuse = state.material.diffuse; \n"
+      "MOV  result.color, Diffuse;\n"
+      "MOV  result.position, vertex.position;\n"
+      "END\n";
+
+   const float Diffuse[4] = { 0.0, 1.0, 0.0, 1.0 };
+   glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, Diffuse);
+
+
+   glGenProgramsARB(1, &prognum);
+
+   glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum);
+   glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+		      strlen(prog1), (const GLubyte *) prog1);
+
+   assert(glIsProgramARB(prognum));
+   errno = glGetError();
+   printf("glGetError = %d\n", errno);
+   if (errno != GL_NO_ERROR)
+   {
+      GLint errorpos;
+
+      glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errorpos);
+      printf("errorpos: %d\n", errorpos);
+      printf("%s\n", (char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB));
+   }
+}
+
+static void Display( void )
+{
+   glClearColor(0.3, 0.3, 0.3, 1);
+   glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+
+   glEnable(GL_VERTEX_PROGRAM_NV);
+
+   glBegin(GL_TRIANGLES);
+   glColor3f(0,0,.7); 
+   glVertex3f( 0.9, -0.9, -0.0);
+   glColor3f(.8,0,0); 
+   glVertex3f( 0.9,  0.9, -0.0);
+   glColor3f(0,.9,0); 
+   glVertex3f(-0.9,  0.0, -0.0);
+   glEnd();
+
+
+   glFlush(); 
+}
+
+
+static void Reshape( int width, int height )
+{
+   glViewport( 0, 0, width, height );
+   glMatrixMode( GL_PROJECTION );
+   glLoadIdentity();
+   glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0);
+   glMatrixMode( GL_MODELVIEW );
+   glLoadIdentity();
+   /*glTranslatef( 0.0, 0.0, -15.0 );*/
+}
+
+
+static void Key( unsigned char key, int x, int y )
+{
+   (void) x;
+   (void) y;
+   switch (key) {
+      case 27:
+         exit(0);
+         break;
+   }
+   glutPostRedisplay();
+}
+
+
+
+
+int main( int argc, char *argv[] )
+{
+   glutInit( &argc, argv );
+   glutInitWindowPosition( 0, 0 );
+   glutInitWindowSize( 250, 250 );
+   glutInitDisplayMode( GLUT_DEPTH | GLUT_RGB | GLUT_SINGLE );
+   glutCreateWindow(argv[0]);
+   glutReshapeFunc( Reshape );
+   glutKeyboardFunc( Key );
+   glutDisplayFunc( Display );
+   Init();
+   glutMainLoop();
+   return 0;
+}
diff --git a/progs/trivial/vp-tri-imm.c b/progs/trivial/vp-tri-imm.c
new file mode 100644
index 0000000000..c774573ba8
--- /dev/null
+++ b/progs/trivial/vp-tri-imm.c
@@ -0,0 +1,101 @@
+/* Test glGenProgramsNV(), glIsProgramNV(), glLoadProgramNV() */
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/glut.h>
+
+static void Init( void )
+{
+   GLint errno;
+   GLuint prognum;
+   
+   static const char *prog1 =
+      "!!ARBvp1.0\n"
+      "ADD  result.color, vertex.color, {.5}.x;\n"  
+      "MOV  result.position, vertex.position;\n"
+      "END\n";
+
+
+   glGenProgramsARB(1, &prognum);
+
+   glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum);
+   glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+		      strlen(prog1), (const GLubyte *) prog1);
+
+   assert(glIsProgramARB(prognum));
+   errno = glGetError();
+   printf("glGetError = %d\n", errno);
+   if (errno != GL_NO_ERROR)
+   {
+      GLint errorpos;
+
+      glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errorpos);
+      printf("errorpos: %d\n", errorpos);
+      printf("%s\n", (char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB));
+   }
+}
+
+static void Display( void )
+{
+   glClearColor(0.3, 0.3, 0.3, 1);
+   glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+
+   glEnable(GL_VERTEX_PROGRAM_NV);
+
+   glBegin(GL_TRIANGLES);
+   glColor3f(0,0,0); 
+   glVertex3f( 0.9, -0.9, -0.0);
+   glVertex3f( 0.9,  0.9, -0.0);
+   glVertex3f(-0.9,  0.0, -0.0);
+   glEnd();
+
+
+   glFlush(); 
+}
+
+
+static void Reshape( int width, int height )
+{
+   glViewport( 0, 0, width, height );
+   glMatrixMode( GL_PROJECTION );
+   glLoadIdentity();
+   glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0);
+   glMatrixMode( GL_MODELVIEW );
+   glLoadIdentity();
+   /*glTranslatef( 0.0, 0.0, -15.0 );*/
+}
+
+
+static void Key( unsigned char key, int x, int y )
+{
+   (void) x;
+   (void) y;
+   switch (key) {
+      case 27:
+         exit(0);
+         break;
+   }
+   glutPostRedisplay();
+}
+
+
+
+
+int main( int argc, char *argv[] )
+{
+   glutInit( &argc, argv );
+   glutInitWindowPosition( 0, 0 );
+   glutInitWindowSize( 250, 250 );
+   glutInitDisplayMode( GLUT_DEPTH | GLUT_RGB | GLUT_SINGLE );
+   glutCreateWindow(argv[0]);
+   glutReshapeFunc( Reshape );
+   glutKeyboardFunc( Key );
+   glutDisplayFunc( Display );
+   Init();
+   glutMainLoop();
+   return 0;
+}
diff --git a/progs/trivial/vp-tri-swap.c b/progs/trivial/vp-tri-swap.c
new file mode 100644
index 0000000000..e9ca1a0378
--- /dev/null
+++ b/progs/trivial/vp-tri-swap.c
@@ -0,0 +1,103 @@
+/* Test glGenProgramsNV(), glIsProgramNV(), glLoadProgramNV() */
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/glut.h>
+
+static void Init( void )
+{
+   GLint errno;
+   GLuint prognum;
+   
+   static const char *prog1 =
+      "!!ARBvp1.0\n"
+      "MOV  result.position, vertex.color;\n"
+      "MOV  result.color, vertex.position;\n"
+      "END\n";
+
+
+   glGenProgramsARB(1, &prognum);
+
+   glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum);
+   glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+		      strlen(prog1), (const GLubyte *) prog1);
+
+   assert(glIsProgramARB(prognum));
+   errno = glGetError();
+   printf("glGetError = %d\n", errno);
+   if (errno != GL_NO_ERROR)
+   {
+      GLint errorpos;
+
+      glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errorpos);
+      printf("errorpos: %d\n", errorpos);
+      printf("%s\n", (char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB));
+   }
+}
+
+static void Display( void )
+{
+   glClearColor(0.3, 0.3, 0.3, 1);
+   glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+
+   glEnable(GL_VERTEX_PROGRAM_NV);
+
+   glBegin(GL_TRIANGLES);
+   glColor3f(0,0,.7); 
+   glVertex3f( 0.9, -0.9, -0.0);
+   glColor3f(.8,0,0); 
+   glVertex3f( 0.9,  0.9, -0.0);
+   glColor3f(0,.9,0); 
+   glVertex3f(-0.9,  0.0, -0.0);
+   glEnd();
+
+
+   glFlush(); 
+}
+
+
+static void Reshape( int width, int height )
+{
+   glViewport( 0, 0, width, height );
+   glMatrixMode( GL_PROJECTION );
+   glLoadIdentity();
+   glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0);
+   glMatrixMode( GL_MODELVIEW );
+   glLoadIdentity();
+   /*glTranslatef( 0.0, 0.0, -15.0 );*/
+}
+
+
+static void Key( unsigned char key, int x, int y )
+{
+   (void) x;
+   (void) y;
+   switch (key) {
+      case 27:
+         exit(0);
+         break;
+   }
+   glutPostRedisplay();
+}
+
+
+
+
+int main( int argc, char *argv[] )
+{
+   glutInit( &argc, argv );
+   glutInitWindowPosition( 0, 0 );
+   glutInitWindowSize( 250, 250 );
+   glutInitDisplayMode( GLUT_DEPTH | GLUT_RGB | GLUT_SINGLE );
+   glutCreateWindow(argv[0]);
+   glutReshapeFunc( Reshape );
+   glutKeyboardFunc( Key );
+   glutDisplayFunc( Display );
+   Init();
+   glutMainLoop();
+   return 0;
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 36751c2621..41a4cba1dd 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -272,6 +272,14 @@ draw_enable_point_sprites(struct draw_context *draw, boolean enable)
 }
 
 
+void
+draw_set_force_passthrough( struct draw_context *draw, boolean enable )
+{
+   draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+   draw->force_passthrough = enable;
+}
+
+
 /**
  * Ask the draw module for the location/slot of the given vertex attribute in
  * a post-transformed vertex.
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 0ab3681b64..3eeb453531 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -160,6 +160,9 @@ void draw_set_render( struct draw_context *draw,
 void draw_set_driver_clipping( struct draw_context *draw,
                                boolean bypass_clipping );
 
+void draw_set_force_passthrough( struct draw_context *draw, 
+                                 boolean enable );
+
 /*******************************************************************************
  * Draw pipeline 
  */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index c0cf4269db..9825e116c3 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -231,9 +231,9 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
       unsigned emit_sz = 0;
       unsigned src_buffer = 0;
       unsigned output_format;
-      unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) );
+      unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
 
-      switch (vbuf->vinfo->emit[i]) {
+      switch (vbuf->vinfo->attrib[i].emit) {
       case EMIT_4F:
 	 output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
 	 emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 40a72c9dcf..5d531146c5 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -163,12 +163,15 @@ struct draw_context
 
    struct {
       boolean bypass_clipping;
+      boolean bypass_vs;
    } driver;
 
    boolean flushing;         /**< debugging/sanity */
    boolean suspend_flushing; /**< internally set */
    boolean bypass_clipping;  /**< set if either api or driver bypass_clipping true */
 
+   boolean force_passthrough; /**< never clip or shade */
+
    /* pipe state that we need: */
    const struct pipe_rasterizer_state *rasterizer;
    struct pipe_viewport_state viewport;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 669c11c993..87ec6ae20c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -69,26 +69,26 @@ draw_pt_arrays(struct draw_context *draw,
          return TRUE;
    }
 
-
-   if (!draw->render) {
-      opt |= PT_PIPELINE;
-   }
-
-   if (draw_need_pipeline(draw,
-                          draw->rasterizer,
-                          prim)) {
-      opt |= PT_PIPELINE;
-   }
-
-   if (!draw->bypass_clipping && !draw->pt.test_fse) {
-      opt |= PT_CLIPTEST;
+   if (!draw->force_passthrough) {
+      if (!draw->render) {
+         opt |= PT_PIPELINE;
+      }
+      
+      if (draw_need_pipeline(draw,
+                             draw->rasterizer,
+                             prim)) {
+         opt |= PT_PIPELINE;
+      }
+
+      if (!draw->bypass_clipping && !draw->pt.test_fse) {
+         opt |= PT_CLIPTEST;
+      }
+      
+      if (!draw->rasterizer->bypass_vs) {
+         opt |= PT_SHADE;
+      }
    }
-
-   if (!draw->rasterizer->bypass_vs) {
-      opt |= PT_SHADE;
-   }
-
-
+      
    if (opt == 0) 
       middle = draw->pt.middle.fetch_emit;
    else if (opt == PT_SHADE && !draw->pt.no_fse)
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d4eca80588..d520b05869 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -84,11 +84,11 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
       unsigned emit_sz = 0;
       unsigned src_buffer = 0;
       unsigned output_format;
-      unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
+      unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
 
 
          
-      switch (vinfo->emit[i]) {
+      switch (vinfo->attrib[i].emit) {
       case EMIT_4F:
 	 output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
 	 emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5a4db6cfe5..3966ad48ba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -121,7 +121,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
    memset(&key, 0, sizeof(key));
 
    for (i = 0; i < vinfo->num_attribs; i++) {
-      const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]];
+      const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index];
 
       unsigned emit_sz = 0;
       unsigned input_format = src->src_format;
@@ -129,7 +129,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
       unsigned input_offset = src->src_offset;
       unsigned output_format;
 
-      switch (vinfo->emit[i]) {
+      switch (vinfo->attrib[i].emit) {
       case EMIT_4F:
 	 output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
 	 emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index a0e08dd10a..f7e6a1a8ee 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -133,7 +133,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
       for (i = 0; i < vinfo->num_attribs; i++) {
          unsigned emit_sz = 0;
 
-         switch (vinfo->emit[i]) {
+         switch (vinfo->attrib[i].emit) {
          case EMIT_4F:
             emit_sz = 4 * sizeof(float);
             break;
@@ -161,8 +161,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
           * numbers, not to positions in the hw vertex description --
           * that's handled by the output_offset field.
           */
-         fse->key.element[i].out.format = vinfo->emit[i];
-         fse->key.element[i].out.vs_output = vinfo->src_index[i];
+         fse->key.element[i].out.format = vinfo->attrib[i].emit;
+         fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index;
          fse->key.element[i].out.offset = dst_offset;
       
          dst_offset += emit_sz;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
index 1446f785c5..3214213e44 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.c
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -49,7 +49,7 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
 
    vinfo->size = 0;
    for (i = 0; i < vinfo->num_attribs; i++) {
-      switch (vinfo->emit[i]) {
+      switch (vinfo->attrib[i].emit) {
       case EMIT_OMIT:
          break;
       case EMIT_4UB:
@@ -81,8 +81,8 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
    unsigned i, j;
 
    for (i = 0; i < vinfo->num_attribs; i++) {
-      j = vinfo->src_index[i];
-      switch (vinfo->emit[i]) {
+      j = vinfo->attrib[i].src_index;
+      switch (vinfo->attrib[i].emit) {
       case EMIT_OMIT:
          debug_printf("EMIT_OMIT:");
          break;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 16c65c4317..a943607d7e 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -75,12 +75,41 @@ struct vertex_info
 {
    uint num_attribs;
    uint hwfmt[4];      /**< hardware format info for this format */
-   enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS];
-   enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS];   /**< EMIT_x */
-   uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */
    uint size;          /**< total vertex size in dwords */
+   
+   /* Keep this small and at the end of the struct to allow quick
+    * memcmp() comparisons.
+    */
+   struct {
+      ubyte interp_mode:4;      /**< INTERP_x */
+      ubyte emit:4;             /**< EMIT_x */
+      ubyte src_index;          /**< map to post-xform attribs */
+   } attrib[PIPE_MAX_SHADER_INPUTS];
 };
 
+static INLINE int
+draw_vinfo_size( const struct vertex_info *a )
+{
+   return ((const char *)&a->attrib[a->num_attribs] -
+           (const char *)a);
+}
+
+static INLINE int
+draw_vinfo_compare( const struct vertex_info *a,
+                    const struct vertex_info *b )
+{
+   unsigned sizea = draw_vinfo_size( a );
+   return memcmp( a, b, sizea );
+}
+
+static INLINE void
+draw_vinfo_copy( struct vertex_info *dst,
+                 const struct vertex_info *src )
+{
+   unsigned size = draw_vinfo_size( src );
+   memcpy( dst, src, size );
+}
+
 
 
 /**
@@ -91,14 +120,15 @@ struct vertex_info
  */
 static INLINE uint
 draw_emit_vertex_attr(struct vertex_info *vinfo,
-                      enum attrib_emit emit, enum interp_mode interp,
+                      enum attrib_emit emit, 
+                      enum interp_mode interp, /* only used by softpipe??? */
                       uint src_index)
 {
    const uint n = vinfo->num_attribs;
    assert(n < PIPE_MAX_SHADER_INPUTS);
-   vinfo->emit[n] = emit;
-   vinfo->interp_mode[n] = interp;
-   vinfo->src_index[n] = src_index;
+   vinfo->attrib[n].emit = emit;
+   vinfo->attrib[n].interp_mode = interp;
+   vinfo->attrib[n].src_index = src_index;
    vinfo->num_attribs++;
    return n;
 }
diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c
index b20f3c4963..cb85e1734e 100644
--- a/src/gallium/auxiliary/gallivm/soabuiltins.c
+++ b/src/gallium/auxiliary/gallivm/soabuiltins.c
@@ -167,6 +167,7 @@ void min(float4 *res,
    res[3] = minvec(tmp0w, tmp1w);
 }
 
+
 void max(float4 *res,
          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index a5abbcde49..99ee74cf14 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -371,7 +371,11 @@ void x86_jcc( struct x86_function *p,
    DUMP_I(cc);
    
    if (offset < 0) {
-      assert(p->csr - p->store > -offset);
+      /*assert(p->csr - p->store > -offset);*/
+      if (p->csr - p->store <= -offset) {
+         /* probably out of memory (using the error_overflow buffer) */
+         return;
+      }
    }
 
    if (offset <= 127 && offset >= -128) {
@@ -699,6 +703,18 @@ void sse_prefetch1( struct x86_function *p, struct x86_reg ptr)
    emit_modrm_noreg(p, 2, ptr);
 }
 
+void sse_movntps( struct x86_function *p, 
+                  struct x86_reg dst,
+                  struct x86_reg src)
+{
+   DUMP_RR( dst, src );
+
+   assert(dst.mod != mod_REG);
+   assert(src.mod == mod_REG);
+   emit_2ub(p, 0x0f, 0x2b);
+   emit_modrm(p, src, dst);
+}
+
 
 
 
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 86091e7f6b..1b5eaaca85 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -190,6 +190,8 @@ void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
 
+void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
+
 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
index d194c2fb15..8f1f58b2dd 100644
--- a/src/gallium/drivers/i915simple/i915_prim_emit.c
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -77,9 +77,9 @@ emit_hw_vertex( struct i915_context *i915,
    assert(!i915->dirty);
 
    for (i = 0; i < vinfo->num_attribs; i++) {
-      const uint j = vinfo->src_index[i];
+      const uint j = vinfo->attrib[i].src_index;
       const float *attrib = vertex->data[j];
-      switch (vinfo->emit[i]) {
+      switch (vinfo->attrib[i].emit) {
       case EMIT_1F:
          OUT_BATCH( fui(attrib[0]) );
          count++;
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
index 488615067c..178d4e8781 100644
--- a/src/gallium/drivers/i915simple/i915_state_derived.c
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -88,12 +88,12 @@ static void calculate_vertex_layout( struct i915_context *i915 )
    if (needW) {
       draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
       vinfo.hwfmt[0] |= S4_VFMT_XYZW;
-      vinfo.emit[0] = EMIT_4F;
+      vinfo.attrib[0].emit = EMIT_4F;
    }
    else {
       draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
       vinfo.hwfmt[0] |= S4_VFMT_XYZ;
-      vinfo.emit[0] = EMIT_3F;
+      vinfo.attrib[0].emit = EMIT_3F;
    }
 
    /* hardware point size */
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index bc8263c33e..13d8017393 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -773,10 +773,10 @@ static void setup_tri_coefficients( struct setup_context *setup )
    /* setup interpolation for all the remaining attributes:
     */
    for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
-      const uint vertSlot = vinfo->src_index[fragSlot];
+      const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
-      switch (vinfo->interp_mode[fragSlot]) {
+      switch (vinfo->attrib[fragSlot].interp_mode) {
       case INTERP_CONSTANT:
          for (j = 0; j < NUM_CHANNELS; j++)
             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
@@ -1084,10 +1084,10 @@ setup_line_coefficients(struct setup_context *setup,
    /* setup interpolation for all the remaining attributes:
     */
    for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
-      const uint vertSlot = vinfo->src_index[fragSlot];
+      const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
-      switch (vinfo->interp_mode[fragSlot]) {
+      switch (vinfo->attrib[fragSlot].interp_mode) {
       case INTERP_CONSTANT:
          for (j = 0; j < NUM_CHANNELS; j++)
             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
@@ -1331,10 +1331,10 @@ setup_point( struct setup_context *setup,
    const_coeff(setup, &setup->posCoef, 0, 3);
 
    for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
-      const uint vertSlot = vinfo->src_index[fragSlot];
+      const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
-      switch (vinfo->interp_mode[fragSlot]) {
+      switch (vinfo->attrib[fragSlot].interp_mode) {
       case INTERP_CONSTANT:
          /* fall-through */
       case INTERP_LINEAR:
diff --git a/src/mesa/main/api_exec.c b/src/mesa/main/api_exec.c
index 0c3c9c4de4..bae3bf11cb 100644
--- a/src/mesa/main/api_exec.c
+++ b/src/mesa/main/api_exec.c
@@ -58,7 +58,7 @@
 #include "colortab.h"
 #endif
 #include "context.h"
-#if FEATURE_convolution
+#if FEATURE_convolve
 #include "convolve.h"
 #endif
 #include "depth.h"
@@ -402,7 +402,7 @@ _mesa_init_exec_table(struct _glapi_table *exec)
    SET_GetColorTableParameteriv(exec, _mesa_GetColorTableParameteriv);
 #endif
 
-#if FEATURE_convolution
+#if FEATURE_convolve
    SET_ConvolutionFilter1D(exec, _mesa_ConvolutionFilter1D);
    SET_ConvolutionFilter2D(exec, _mesa_ConvolutionFilter2D);
    SET_ConvolutionParameterf(exec, _mesa_ConvolutionParameterf);
diff --git a/src/mesa/main/mfeatures.h b/src/mesa/main/mfeatures.h
index ed78f57edf..3819da3d68 100644
--- a/src/mesa/main/mfeatures.h
+++ b/src/mesa/main/mfeatures.h
@@ -39,7 +39,7 @@
 #define FEATURE_accum  _HAVE_FULL_GL
 #define FEATURE_attrib_stack  _HAVE_FULL_GL
 #define FEATURE_colortable  _HAVE_FULL_GL
-#define FEATURE_convolution  _HAVE_FULL_GL
+#define FEATURE_convolve  _HAVE_FULL_GL
 #define FEATURE_dispatch  _HAVE_FULL_GL
 #define FEATURE_dlist  _HAVE_FULL_GL
 #define FEATURE_draw_read_buffer  _HAVE_FULL_GL
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8e4f6a2e66..8bf6858c1e 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2722,6 +2722,7 @@ struct gl_matrix_stack
 #define _NEW_MULTISAMPLE        0x2000000  /**< __GLcontextRec::Multisample */
 #define _NEW_TRACK_MATRIX       0x4000000  /**< __GLcontextRec::VertexProgram */
 #define _NEW_PROGRAM            0x8000000  /**< __GLcontextRec::VertexProgram */
+#define _NEW_CURRENT_ATTRIB     0x10000000  /**< __GLcontextRec::Current */
 #define _NEW_ALL ~0
 /*@}*/
 
@@ -3049,6 +3050,8 @@ struct __GLcontextRec
    GLenum RenderMode;        /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
    GLbitfield NewState;      /**< bitwise-or of _NEW_* flags */
 
+   GLbitfield varying_vp_inputs;  /**< mask of VERT_BIT_* flags */
+
    /** \name Derived state */
    /*@{*/
    /** Bitwise-or of DD_* flags.  Note that this bitfield may be used before
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index a1e32e70ba..2d06030030 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -95,7 +95,7 @@ _mesa_wait_query(GLcontext *ctx, struct gl_query_object *q)
  * XXX maybe add Delete() method to gl_query_object class and call that instead
  */
 void
-_mesa_delete_query(struct gl_query_object *q)
+_mesa_delete_query(GLcontext *ctx, struct gl_query_object *q)
 {
    _mesa_free(q);
 }
diff --git a/src/mesa/main/queryobj.h b/src/mesa/main/queryobj.h
index c05a1f3da8..9a9774641b 100644
--- a/src/mesa/main/queryobj.h
+++ b/src/mesa/main/queryobj.h
@@ -37,7 +37,7 @@ extern void
 _mesa_free_query_data(GLcontext *ctx);
 
 extern void
-_mesa_delete_query(struct gl_query_object *q);
+_mesa_delete_query(GLcontext *ctx, struct gl_query_object *q);
 
 extern void
 _mesa_begin_query(GLcontext *ctx, struct gl_query_object *q);
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 5913019bc1..7e44310d8d 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -447,6 +447,9 @@ _mesa_update_state_locked( GLcontext *ctx )
    GLbitfield new_state = ctx->NewState;
    GLbitfield prog_flags = _NEW_PROGRAM;
 
+   if (new_state == _NEW_CURRENT_ATTRIB) 
+      goto out;
+
    if (MESA_VERBOSE & VERBOSE_STATE)
       _mesa_print_state("_mesa_update_state", new_state);
 
@@ -510,7 +513,8 @@ _mesa_update_state_locked( GLcontext *ctx )
       _mesa_update_tnl_spaces( ctx, new_state );
 
    if (ctx->FragmentProgram._MaintainTexEnvProgram) {
-      prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
+      prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT |
+                     _NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
    }
    if (ctx->VertexProgram._MaintainTnlProgram) {
       prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
@@ -532,6 +536,7 @@ _mesa_update_state_locked( GLcontext *ctx )
     * Set ctx->NewState to zero to avoid recursion if
     * Driver.UpdateState() has to call FLUSH_VERTICES().  (fixed?)
     */
+ out:
    new_state = ctx->NewState;
    ctx->NewState = 0;
    ctx->Driver.UpdateState(ctx, new_state);
@@ -548,3 +553,38 @@ _mesa_update_state( GLcontext *ctx )
    _mesa_update_state_locked(ctx);
    _mesa_unlock_context_textures(ctx);
 }
+
+
+
+
+/* Want to figure out which fragment program inputs are actually
+ * constant/current values from ctx->Current.  These should be
+ * referenced as a tracked state variable rather than a fragment
+ * program input, to save the overhead of putting a constant value in
+ * every submitted vertex, transferring it to hardware, interpolating
+ * it across the triangle, etc...
+ *
+ * When there is a VP bound, just use vp->outputs.  But when we're
+ * generating vp from fixed function state, basically want to
+ * calculate:
+ *
+ * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | 
+ *                 potential_vp_outputs )
+ *
+ * Where potential_vp_outputs is calculated by looking at enabled
+ * texgen, etc.
+ * 
+ * The generated fragment program should then only declare inputs that
+ * may vary or otherwise differ from the ctx->Current values.
+ * Otherwise, the fp should track them as state values instead.
+ */
+void
+_mesa_set_varying_vp_inputs( GLcontext *ctx,
+                             GLbitfield varying_inputs )
+{
+   if (ctx->varying_vp_inputs != varying_inputs) {
+      ctx->varying_vp_inputs = varying_inputs;
+      ctx->NewState |= _NEW_ARRAY;
+      //_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs);
+   }
+}
diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h
index bb7cb8f32a..79f2f6beb0 100644
--- a/src/mesa/main/state.h
+++ b/src/mesa/main/state.h
@@ -37,5 +37,8 @@ _mesa_update_state( GLcontext *ctx );
 extern void
 _mesa_update_state_locked( GLcontext *ctx );
 
+void
+_mesa_set_varying_vp_inputs( GLcontext *ctx,
+                             GLbitfield varying_inputs );
 
 #endif
diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c
index f6bbbcfaed..c23173014d 100644
--- a/src/mesa/main/texenvprogram.c
+++ b/src/mesa/main/texenvprogram.c
@@ -55,15 +55,17 @@ struct texenvprog_cache_item
 #define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM)
 
 struct mode_opt {
-   GLuint Source:4;
-   GLuint Operand:3;
+   GLubyte Source:4;
+   GLubyte Operand:3;
 };
 
 struct state_key {
-   GLbitfield enabled_units;
+   GLuint nr_enabled_units:8;
+   GLuint enabled_units:8;
    GLuint separate_specular:1;
    GLuint fog_enabled:1;
    GLuint fog_mode:2;
+   GLuint inputs_available:12;
 
    struct {
       GLuint enabled:1;
@@ -74,10 +76,10 @@ struct state_key {
 
       GLuint NumArgsRGB:2;
       GLuint ModeRGB:4;
-      struct mode_opt OptRGB[3];
-
       GLuint NumArgsA:2;
       GLuint ModeA:4;
+
+      struct mode_opt OptRGB[3];
       struct mode_opt OptA[3];
    } unit[8];
 };
@@ -199,6 +201,66 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
    }
 }
 
+#define VERT_BIT_TEX_ANY    (0xff << VERT_ATTRIB_TEX0)
+#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0)
+
+/**
+ * Identify all possible varying inputs.  The fragment program will
+ * never reference non-varying inputs, but will track them via state
+ * constants instead.
+ *
+ * This function figures out all the inputs that the fragment program
+ * has access to.  The bitmask is later reduced to just those which
+ * are actually referenced.
+ */
+static GLbitfield get_fp_input_mask( GLcontext *ctx )
+{
+   GLbitfield fp_inputs = 0x0;
+
+   if (!ctx->VertexProgram._Enabled ||
+       !ctx->VertexProgram._Current) {
+
+      /* Fixed function logic */
+      GLbitfield varying_inputs = ctx->varying_vp_inputs;
+
+      /* First look at what values may be computed by the generated
+       * vertex program:
+       */
+      if (ctx->Light.Enabled) {
+         fp_inputs |= FRAG_BIT_COL0;
+
+         if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+            fp_inputs |= FRAG_BIT_COL1;
+      }
+
+      fp_inputs |= (ctx->Texture._TexGenEnabled |
+                    ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0;
+
+      /* Then look at what might be varying as a result of enabled
+       * arrays, etc:
+       */
+      if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0;
+      if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1;
+
+      fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) 
+                    << FRAG_ATTRIB_TEX0);
+
+   }
+   else {
+      /* calculate from vp->outputs */
+      GLbitfield vp_outputs = ctx->VertexProgram._Current->Base.OutputsWritten;
+
+      if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0;
+      if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1;
+
+      fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0) 
+                    << FRAG_ATTRIB_TEX0);
+   }
+   
+   return fp_inputs;
+}
+
+
 /**
  * Examine current texture environment state and generate a unique
  * key to identify it.
@@ -206,7 +268,9 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
 static void make_state_key( GLcontext *ctx,  struct state_key *key )
 {
    GLuint i, j;
-	
+   GLbitfield inputs_referenced = FRAG_BIT_COL0;
+   GLbitfield inputs_available = get_fp_input_mask( ctx );
+
    memset(key, 0, sizeof(*key));
 
    for (i=0;i<MAX_TEXTURE_UNITS;i++) {
@@ -217,6 +281,8 @@ static void make_state_key( GLcontext *ctx,  struct state_key *key )
 
       key->unit[i].enabled = 1;
       key->enabled_units |= (1<<i);
+      key->nr_enabled_units = i+1;
+      inputs_referenced |= FRAG_BIT_TEX(i);
 
       key->unit[i].source_index = 
 	 translate_tex_src_bit(texUnit->_ReallyEnabled);		
@@ -245,16 +311,22 @@ static void make_state_key( GLcontext *ctx,  struct state_key *key )
       }
    }
 	
-   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
       key->separate_specular = 1;
+      inputs_referenced |= FRAG_BIT_COL1;
+   }
 
    if (ctx->Fog.Enabled) {
       key->fog_enabled = 1;
       key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
+      inputs_referenced |= FRAG_BIT_FOGC; /* maybe */
    }
+
+   key->inputs_available = (inputs_available & inputs_referenced);
 }
 
-/* Use uregs to represent registers internally, translate to Mesa's
+/**
+ * Use uregs to represent registers internally, translate to Mesa's
  * expected formats on emit.  
  *
  * NOTE: These are passed by value extensively in this file rather
@@ -287,16 +359,16 @@ static const struct ureg undef = {
 };
 
 
-/* State used to build the fragment program:
+/** State used to build the fragment program:
  */
 struct texenv_fragment_program {
    struct gl_fragment_program *program;
    GLcontext *ctx;
    struct state_key *state;
 
-   GLbitfield alu_temps;	/* Track texture indirections, see spec. */
-   GLbitfield temps_output;	/* Track texture indirections, see spec. */
-   GLbitfield temp_in_use;	/* Tracks temporary regs which are in use. */
+   GLbitfield alu_temps;	/**< Track texture indirections, see spec. */
+   GLbitfield temps_output;	/**< Track texture indirections, see spec. */
+   GLbitfield temp_in_use;	/**< Tracks temporary regs which are in use. */
    GLboolean error;
 
    struct ureg src_texture[MAX_TEXTURE_UNITS];   
@@ -304,11 +376,11 @@ struct texenv_fragment_program {
     * else undef.
     */
 
-   struct ureg src_previous;	/* Reg containing color from previous 
+   struct ureg src_previous;	/**< Reg containing color from previous 
 				 * stage.  May need to be decl'd.
 				 */
 
-   GLuint last_tex_stage;	/* Number of last enabled texture unit */
+   GLuint last_tex_stage;	/**< Number of last enabled texture unit */
 
    struct ureg half;
    struct ureg one;
@@ -457,11 +529,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p,
 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 
+static GLuint frag_to_vert_attrib( GLuint attrib )
+{
+   switch (attrib) {
+   case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
+   case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
+   default:
+      assert(attrib >= FRAG_ATTRIB_TEX0);
+      assert(attrib <= FRAG_ATTRIB_TEX7);
+      return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
+   }
+}
+
 
 static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
 {
-   p->program->Base.InputsRead |= (1 << input);
-   return make_ureg(PROGRAM_INPUT, input);
+   if (p->state->inputs_available & (1<<input)) {
+      p->program->Base.InputsRead |= (1 << input);
+      return make_ureg(PROGRAM_INPUT, input);
+   }
+   else {
+      GLuint idx = frag_to_vert_attrib( input );
+      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx );
+   }
 }
 
 
diff --git a/src/mesa/shader/prog_cache.c b/src/mesa/shader/prog_cache.c
index 36a25377c5..9437e59613 100644
--- a/src/mesa/shader/prog_cache.c
+++ b/src/mesa/shader/prog_cache.c
@@ -44,6 +44,7 @@ struct cache_item
 struct gl_program_cache
 {
    struct cache_item **items;
+   struct cache_item *last;
    GLuint size, n_items;
 };
 
@@ -83,6 +84,8 @@ rehash(struct gl_program_cache *cache)
    struct cache_item *c, *next;
    GLuint size, i;
 
+   cache->last = NULL;
+
    size = cache->size * 3;
    items = (struct cache_item**) _mesa_malloc(size * sizeof(*items));
    _mesa_memset(items, 0, size * sizeof(*items));
@@ -105,6 +108,8 @@ clear_cache(GLcontext *ctx, struct gl_program_cache *cache)
 {
    struct cache_item *c, *next;
    GLuint i;
+   
+   cache->last = NULL;
 
    for (i = 0; i < cache->size; i++) {
       for (c = cache->items[i]; c; c = next) {
@@ -149,18 +154,26 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *cache)
 
 
 struct gl_program *
-_mesa_search_program_cache(const struct gl_program_cache *cache,
+_mesa_search_program_cache(struct gl_program_cache *cache,
                            const void *key, GLuint keysize)
 {
-   const GLuint hash = hash_key(key, keysize);
-   struct cache_item *c;
-
-   for (c = cache->items[hash % cache->size]; c; c = c->next) {
-      if (c->hash == hash && memcmp(c->key, key, keysize) == 0)
-	 return c->program;
+   if (cache->last && 
+       memcmp(cache->last->key, key, keysize) == 0) {
+      return cache->last->program;
    }
+   else {
+      const GLuint hash = hash_key(key, keysize);
+      struct cache_item *c;
+
+      for (c = cache->items[hash % cache->size]; c; c = c->next) {
+         if (c->hash == hash && memcmp(c->key, key, keysize) == 0) {
+            cache->last = c;
+            return c->program;
+         }
+      }
 
-   return NULL;
+      return NULL;
+   }
 }
 
 
diff --git a/src/mesa/shader/prog_cache.h b/src/mesa/shader/prog_cache.h
index a8c91fba01..4e1ccac03f 100644
--- a/src/mesa/shader/prog_cache.h
+++ b/src/mesa/shader/prog_cache.h
@@ -42,7 +42,7 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *pc);
 
 
 extern struct gl_program *
-_mesa_search_program_cache(const struct gl_program_cache *cache,
+_mesa_search_program_cache(struct gl_program_cache *cache,
                            const void *key, GLuint keysize);
 
 extern void
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index d4e31207e8..34c4741350 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -395,6 +395,12 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
 
    case STATE_INTERNAL:
       switch (state[1]) {
+      case STATE_CURRENT_ATTRIB: {
+         const GLuint idx = (GLuint) state[2];
+         COPY_4V(value, ctx->Current.Attrib[idx]);
+         return;
+      }						  
+
       case STATE_NORMAL_SCALE:
          ASSIGN_4V(value, 
                    ctx->_ModelViewInvScale, 
@@ -501,6 +507,9 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
          }
          return;
 
+         /* XXX: make sure new tokens added here are also handled in the 
+          * _mesa_program_state_flags() switch, below.
+          */
       default:
          /* unknown state indexes are silently ignored
           *  should be handled by the driver.
@@ -574,11 +583,29 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH])
 
    case STATE_INTERNAL:
       switch (state[1]) {
+      case STATE_CURRENT_ATTRIB:
+         return _NEW_CURRENT_ATTRIB;
+
+      case STATE_NORMAL_SCALE:
+         return _NEW_MODELVIEW;
+
       case STATE_TEXRECT_SCALE:
       case STATE_SHADOW_AMBIENT:
 	 return _NEW_TEXTURE;
       case STATE_FOG_PARAMS_OPTIMIZED:
 	 return _NEW_FOG;
+      case STATE_LIGHT_SPOT_DIR_NORMALIZED:
+      case STATE_LIGHT_POSITION:
+      case STATE_LIGHT_POSITION_NORMALIZED:
+      case STATE_LIGHT_HALF_VECTOR:
+         return _NEW_LIGHT;
+
+      case STATE_PT_SCALE:
+      case STATE_PT_BIAS:
+      case STATE_PCM_SCALE:
+      case STATE_PCM_BIAS:
+         return _NEW_PIXEL;
+
       default:
          /* unknown state indexes are silently ignored and
          *  no flag set, since it is handled by the driver.
diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h
index 20643ca794..72e51f4031 100644
--- a/src/mesa/shader/prog_statevars.h
+++ b/src/mesa/shader/prog_statevars.h
@@ -104,6 +104,7 @@ typedef enum gl_state_index_ {
    STATE_LOCAL,
 
    STATE_INTERNAL,		/* Mesa additions */
+   STATE_CURRENT_ATTRIB,        /* ctx->Current vertex attrib value */
    STATE_NORMAL_SCALE,
    STATE_TEXRECT_SCALE,
    STATE_FOG_PARAMS_OPTIMIZED,  /* for faster fog calc */
diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c
index f120c20bdf..5d21cff672 100644
--- a/src/mesa/shader/program.c
+++ b/src/mesa/shader/program.c
@@ -686,17 +686,47 @@ _mesa_combine_programs(GLcontext *ctx,
 
    if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) {
       struct gl_fragment_program *fprogA, *fprogB, *newFprog;
+      GLbitfield progB_inputsRead = progB->InputsRead;
+      GLint progB_colorFile, progB_colorIndex;
+
       fprogA = (struct gl_fragment_program *) progA;
       fprogB = (struct gl_fragment_program *) progB;
       newFprog = (struct gl_fragment_program *) newProg;
 
       newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill;
 
+      /* We'll do a search and replace for instances
+       * of progB_colorFile/progB_colorIndex below...
+       */
+      progB_colorFile = PROGRAM_INPUT;
+      progB_colorIndex = FRAG_ATTRIB_COL0;
+
+      /*
+       * The fragment program may get color from a state var rather than
+       * a fragment input (vertex output) if it's constant.
+       * See the texenvprogram.c code.
+       * So, search the program's parameter list now to see if the program
+       * gets color from a state var instead of a conventional fragment
+       * input register.
+       */
+      for (i = 0; i < progB->Parameters->NumParameters; i++) {
+         struct gl_program_parameter *p = &progB->Parameters->Parameters[i];
+         if (p->Type == PROGRAM_STATE_VAR &&
+             p->StateIndexes[0] == STATE_INTERNAL &&
+             p->StateIndexes[1] == STATE_CURRENT_ATTRIB &&
+             p->StateIndexes[2] == VERT_ATTRIB_COLOR0) {
+            progB_inputsRead |= FRAG_BIT_COL0;
+            progB_colorFile = PROGRAM_STATE_VAR;
+            progB_colorIndex = i;
+            break;
+         }
+      }
+
       /* Connect color outputs of fprogA to color inputs of fprogB, via a
        * new temporary register.
        */
       if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) &&
-          (progB->InputsRead & (1 << FRAG_ATTRIB_COL0))) {
+          (progB_inputsRead & FRAG_BIT_COL0)) {
          GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY);
          if (tempReg < 0) {
             _mesa_problem(ctx, "No free temp regs found in "
@@ -707,13 +737,14 @@ _mesa_combine_programs(GLcontext *ctx,
          replace_registers(newInst, lenA,
                            PROGRAM_OUTPUT, FRAG_RESULT_COLR,
                            PROGRAM_TEMPORARY, tempReg);
-         /* replace reads from input.color[0] with tempReg */
+         /* replace reads from the input color with tempReg */
          replace_registers(newInst + lenA, lenB,
-                           PROGRAM_INPUT, FRAG_ATTRIB_COL0,
-                           PROGRAM_TEMPORARY, tempReg);
+                           progB_colorFile, progB_colorIndex, /* search for */
+                           PROGRAM_TEMPORARY, tempReg  /* replace with */ );
       }
 
-      inputsB = progB->InputsRead;
+      /* compute combined program's InputsRead */
+      inputsB = progB_inputsRead;
       if (progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) {
          inputsB &= ~(1 << FRAG_ATTRIB_COL0);
       }
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index fc47896c24..5eef4ebe92 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -215,6 +215,9 @@ static void update_raster_state( struct st_context *st )
          raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE;
       }
    }
+
+   /* ST_NEW_VERTEX_PROGRAM
+    */
    if (vertProg) {
       if (vertProg->Base.Id == 0) {
          if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
@@ -277,7 +280,7 @@ const struct st_tracked_state st_update_rasterizer = {
        _NEW_POLYGON |
        _NEW_PROGRAM |
        _NEW_SCISSOR),      /* mesa state dependencies*/
-      0,                   /* state tracker dependencies */
+      ST_NEW_VERTEX_PROGRAM,  /* state tracker dependencies */
    },
    update_raster_state     /* update function */
 };
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index e545e00eb2..acaf1de882 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 #include "main/imports.h"
-#if FEATURE_convolution
+#if FEATURE_convolve
 #include "main/convolve.h"
 #endif
 #include "main/enums.h"
@@ -409,7 +409,7 @@ st_TexImage(GLcontext * ctx,
    stImage->face = _mesa_tex_target_to_face(target);
    stImage->level = level;
 
-#if FEATURE_convolution
+#if FEATURE_convolve
    if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
       _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
                                          &postConvHeight);
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index f9016923dc..61949a9388 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -526,7 +526,7 @@ st_draw_vbo(GLcontext *ctx,
       num_vbuffers = 1;
       num_velements = vp->num_inputs;
       if (num_velements == 0)
-	      num_vbuffers = 0;
+         num_vbuffers = 0;
    }
    else {
       /*printf("Draw non-interleaved\n");*/
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index fdb0c5a9a4..a6ce26ffed 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -143,29 +143,37 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec )
 
    for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
       if (exec->vtx.attrsz[i]) {
-	 GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
-
          /* Note: the exec->vtx.current[i] pointers point into the
           * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
           */
-	 COPY_CLEAN_4V(current, 
-		       exec->vtx.attrsz[i], 
-		       exec->vtx.attrptr[i]);
+	 GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
+         GLfloat tmp[4];
+
+         COPY_CLEAN_4V(tmp, 
+                       exec->vtx.attrsz[i], 
+                       exec->vtx.attrptr[i]);
+         
+         if (memcmp(current, tmp, sizeof(tmp)) != 0)
+         { 
+            memcpy(current, tmp, sizeof(tmp));
 
 	 
-	 /* Given that we explicitly state size here, there is no need
-	  * for the COPY_CLEAN above, could just copy 16 bytes and be
-	  * done.  The only problem is when Mesa accesses ctx->Current
-	  * directly.
-	  */
-	 vbo->currval[i].Size = exec->vtx.attrsz[i];
-
-	 /* This triggers rather too much recalculation of Mesa state
-	  * that doesn't get used (eg light positions).
-	  */
-	 if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT &&
-	     i <= VBO_ATTRIB_MAT_BACK_INDEXES)
-	    ctx->NewState |= _NEW_LIGHT;
+            /* Given that we explicitly state size here, there is no need
+             * for the COPY_CLEAN above, could just copy 16 bytes and be
+             * done.  The only problem is when Mesa accesses ctx->Current
+             * directly.
+             */
+            vbo->currval[i].Size = exec->vtx.attrsz[i];
+
+            /* This triggers rather too much recalculation of Mesa state
+             * that doesn't get used (eg light positions).
+             */
+            if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT &&
+                i <= VBO_ATTRIB_MAT_BACK_INDEXES)
+               ctx->NewState |= _NEW_LIGHT;
+            
+            ctx->NewState |= _NEW_CURRENT_ATTRIB;
+         }
       }
    }
 
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 0f9d8da356..8871e10cf6 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -127,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx )
    struct vbo_context *vbo = vbo_context(ctx);
    struct vbo_exec_context *exec = &vbo->exec;
    const struct gl_client_array **inputs = &exec->array.inputs[0];
+   GLbitfield const_inputs = 0x0;
    GLuint i;
 
    exec->array.program_mode = get_program_mode(ctx);
@@ -141,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx )
       for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
 	 if (exec->array.legacy_array[i]->Enabled)
 	    inputs[i] = exec->array.legacy_array[i];
-	 else
+	 else {
 	    inputs[i] = &vbo->legacy_currval[i];
+            const_inputs |= 1 << i;
+         }
       }
 
       for (i = 0; i < MAT_ATTRIB_MAX; i++) {
 	 inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
+         const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
       }
 
       /* Could use just about anything, just to fill in the empty
        * slots:
        */
-      for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++)
+      for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
 	 inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+         const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+      }
 
       break;
    case VP_NV:
@@ -166,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx )
 	    inputs[i] = exec->array.generic_array[i];
 	 else if (exec->array.legacy_array[i]->Enabled)
 	    inputs[i] = exec->array.legacy_array[i];
-	 else
+	 else {
 	    inputs[i] = &vbo->legacy_currval[i];
+            const_inputs |= 1 << i;
+         }
       }
 
       /* Could use just about anything, just to fill in the empty
        * slots:
        */
-      for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++)
+      for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
 	 inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
+         const_inputs |= 1 << i;
+      }
 
       break;
    case VP_ARB:
@@ -189,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx )
 	 inputs[0] = exec->array.generic_array[0];
       else if (exec->array.legacy_array[0]->Enabled)
 	 inputs[0] = exec->array.legacy_array[0];
-      else
+      else {
 	 inputs[0] = &vbo->legacy_currval[0];
+         const_inputs |= 1 << 0;
+      }
 
 
       for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
 	 if (exec->array.legacy_array[i]->Enabled)
 	    inputs[i] = exec->array.legacy_array[i];
-	 else
+	 else {
 	    inputs[i] = &vbo->legacy_currval[i];
+            const_inputs |= 1 << i;
+         }
       }
 
       for (i = 0; i < 16; i++) {
 	 if (exec->array.generic_array[i]->Enabled)
 	    inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
-	 else
+	 else {
 	    inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+            const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+         }
+
       }
       break;
    }
+
+   _mesa_set_varying_vp_inputs( ctx, ~const_inputs );
 }
 
 static void bind_arrays( GLcontext *ctx )
@@ -257,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
 
    bind_arrays( ctx );
 
+   /* Again...
+    */
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
    prim[0].begin = 1;
    prim[0].end = 1;
    prim[0].weak = 0;
@@ -297,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode,
 
    bind_arrays( ctx );
 
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
    ib.count = count;
    ib.type = type; 
    ib.obj = ctx->Array.ElementArrayBufferObj;
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 92356ba977..5bf3d836db 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
    GLubyte *data = exec->vtx.buffer_map;
    const GLuint *map;
    GLuint attr;
+   GLbitfield varying_inputs = 0x0;
 
    /* Install the default (ie Current) attributes first, then overlay
     * all active ones.
@@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
 	 arrays[attr]._MaxElement = count; /* ??? */
 
 	 data += exec->vtx.attrsz[src] * sizeof(GLfloat);
+         varying_inputs |= 1<<attr;
       }
    }
+
+   _mesa_set_varying_vp_inputs( ctx, varying_inputs );
 }
 
 
@@ -242,6 +246,9 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec )
 	  */
 	 vbo_exec_bind_arrays( ctx );
 
+         if (ctx->NewState)
+            _mesa_update_state( ctx );
+
          /* if using a real VBO, unmap it before drawing */
          if (exec->vtx.bufferobj->Name) {
             ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index ed82f09958..0488c5d718 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -64,18 +64,26 @@ static void _playback_copy_to_current( GLcontext *ctx,
    for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
       if (node->attrsz[i]) {
 	 GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
+         GLfloat tmp[4];
 
-	 COPY_CLEAN_4V(current, 
-		       node->attrsz[i], 
-		       data);
+         COPY_CLEAN_4V(tmp, 
+                       node->attrsz[i], 
+                       data);
+         
+         if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0)
+         {
+            memcpy(current, tmp, 4 * sizeof(GLfloat));
 
-	 vbo->currval[i].Size = node->attrsz[i];
+            vbo->currval[i].Size = node->attrsz[i];
 
-	 data += node->attrsz[i];
+            if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
+                i <= VBO_ATTRIB_LAST_MATERIAL)
+               ctx->NewState |= _NEW_LIGHT;
+
+            ctx->NewState |= _NEW_CURRENT_ATTRIB;
+         }
 
-	 if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
-	     i <= VBO_ATTRIB_LAST_MATERIAL)
-	    ctx->NewState |= _NEW_LIGHT;
+	 data += node->attrsz[i];
       }
    }
 
@@ -110,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
    GLuint data = node->buffer_offset;
    const GLuint *map;
    GLuint attr;
+   GLbitfield varying_inputs = 0x0;
 
    /* Install the default (ie Current) attributes first, then overlay
     * all active ones.
@@ -159,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
 	 assert(arrays[attr].BufferObj->Name);
 
 	 data += node->attrsz[src] * sizeof(GLfloat);
+         varying_inputs |= 1<<attr;
       }
    }
+
+   _mesa_set_varying_vp_inputs( ctx, varying_inputs );
 }
 
 static void vbo_save_loopback_vertex_list( GLcontext *ctx,
@@ -229,6 +241,11 @@ void vbo_save_playback_vertex_list( GLcontext *ctx, void *data )
 
       vbo_bind_vertex_list( ctx, node );
 
+      /* Again...
+       */
+      if (ctx->NewState)
+	 _mesa_update_state( ctx );
+
       vbo_context(ctx)->draw_prims( ctx, 
 				    save->inputs, 
 				    node->prim,
author	Keith Whitwell <keith@tungstengraphics.com>	2008-10-10 15:19:05 +0100
committer	Keith Whitwell <keith@tungstengraphics.com>	2008-10-10 15:23:36 +0100
commit	d7f1cb5b5a134b63227d5746a2dd1f05597c5c2f (patch)
tree	7446e243da8b50f6cb323b7917bdb94fbd528368
parent	7ac1fc77661faf0897507fef0437fe69d0ba53ac (diff)
parent	f7556fdd40ed2719beaba271eee4a7551e212ad1 (diff)